From 149e0b6ae9871515be21f23b492f5ef7355e2ca4 Mon Sep 17 00:00:00 2001 From: Nathan Reiner Date: Fri, 14 Jul 2023 00:22:39 +0200 Subject: make fast using hash instead of dictionary --- src/dictionary.rs | 105 ------------------------------------------------------ 1 file changed, 105 deletions(-) delete mode 100644 src/dictionary.rs (limited to 'src/dictionary.rs') diff --git a/src/dictionary.rs b/src/dictionary.rs deleted file mode 100644 index 3e05b91..0000000 --- a/src/dictionary.rs +++ /dev/null @@ -1,105 +0,0 @@ -use std::collections::HashMap; -use crate::vector::FileVector; - -/// The dictionary is used to cache to words ids. -/// It also provides a function to convert it to -/// a vector and generate a FileVector from a word list -/// with the current directory. -#[derive(Clone, Debug)] -pub struct Dictionary { - last_index : usize, - data : HashMap, -} - -impl Default for Dictionary { - fn default() -> Self { - Self::new() - } -} - -impl Dictionary { - pub fn new() -> Self { - Self { last_index : 0, data : HashMap::new() } - } - - pub fn from_line(line : &str) -> Self { - let mut data : HashMap = HashMap::new(); - let mut i : usize = 0; - - for word in line.split(',') { - data.insert(word.to_string(), i as u64); - i += 1; - } - - Self { last_index : i - 1, data } - } - - pub fn set(&mut self, name : &String) { - if !self.data.contains_key(name) { - self.last_index += 1; - self.data.insert(name.clone(), self.last_index as u64); - } - } - - pub fn set_and_get(&mut self, name : &String) -> u64 { - if !self.data.contains_key(name) { - self.last_index += 1; - self.data.insert(name.clone(), self.last_index as u64); - self.last_index as u64 - } else { - *self.data.get(name).unwrap() - } - } - - pub fn get(&self, name : &String) -> Option<&u64> { - self.data.get(name) - } - - pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, u64> { - self.data.iter() - } - - pub fn to_list(&self) -> Vec { - let mut v = Vec::with_capacity(self.last_index + 1); - - v.resize(self.last_index + 1, "".to_string()); - - for (word, id) in self.iter() { - v[(*id) as usize] = word.clone(); - } - - v - } - - pub fn vectorize_word_list(&self, words : &Vec<&String>) -> FileVector { - let mut fv = FileVector::new(); - - for word in words { - let i = *self.get(word).unwrap(); - if !fv.contains_key(&i) { - fv.insert(i, 1); - } else { - let c = *fv.get(&i).unwrap(); - fv.insert(i, c + 1); - } - } - - fv - } - - pub fn insert_words_and_vectorize_word_list(&mut self, words : &Vec<&String>) -> FileVector { - let mut fv = FileVector::new(); - - for word in words { - let i = self.set_and_get(word); - if !fv.contains_key(&i) { - fv.insert(i, 1); - } else { - let c = *fv.get(&i).unwrap(); - fv.insert(i, c + 1); - } - } - - fv - } -} -- cgit v1.2.3-70-g09d2