diff options
| author | Nathan Reiner <nathan@nathanreiner.xyz> | 2023-07-14 00:22:39 +0200 |
|---|---|---|
| committer | Nathan Reiner <nathan@nathanreiner.xyz> | 2023-07-14 00:22:39 +0200 |
| commit | 149e0b6ae9871515be21f23b492f5ef7355e2ca4 (patch) | |
| tree | d5b1bf8281a3a1cf181d5c921a53dfd99fd8b7a9 /src/dictionary.rs | |
| parent | 0723ea6b6bb6832b11582eeb8a330d2bdb6077b5 (diff) | |
make fast using hash instead of dictionary
Diffstat (limited to 'src/dictionary.rs')
| -rw-r--r-- | src/dictionary.rs | 105 |
1 files changed, 0 insertions, 105 deletions
diff --git a/src/dictionary.rs b/src/dictionary.rs deleted file mode 100644 index 3e05b91..0000000 --- a/src/dictionary.rs +++ /dev/null @@ -1,105 +0,0 @@ -use std::collections::HashMap; -use crate::vector::FileVector; - -/// The dictionary is used to cache to words ids. -/// It also provides a function to convert it to -/// a vector and generate a FileVector from a word list -/// with the current directory. -#[derive(Clone, Debug)] -pub struct Dictionary { - last_index : usize, - data : HashMap<String, u64>, -} - -impl Default for Dictionary { - fn default() -> Self { - Self::new() - } -} - -impl Dictionary { - pub fn new() -> Self { - Self { last_index : 0, data : HashMap::new() } - } - - pub fn from_line(line : &str) -> Self { - let mut data : HashMap<String, u64> = HashMap::new(); - let mut i : usize = 0; - - for word in line.split(',') { - data.insert(word.to_string(), i as u64); - i += 1; - } - - Self { last_index : i - 1, data } - } - - pub fn set(&mut self, name : &String) { - if !self.data.contains_key(name) { - self.last_index += 1; - self.data.insert(name.clone(), self.last_index as u64); - } - } - - pub fn set_and_get(&mut self, name : &String) -> u64 { - if !self.data.contains_key(name) { - self.last_index += 1; - self.data.insert(name.clone(), self.last_index as u64); - self.last_index as u64 - } else { - *self.data.get(name).unwrap() - } - } - - pub fn get(&self, name : &String) -> Option<&u64> { - self.data.get(name) - } - - pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, u64> { - self.data.iter() - } - - pub fn to_list(&self) -> Vec<String> { - let mut v = Vec::with_capacity(self.last_index + 1); - - v.resize(self.last_index + 1, "".to_string()); - - for (word, id) in self.iter() { - v[(*id) as usize] = word.clone(); - } - - v - } - - pub fn vectorize_word_list(&self, words : &Vec<&String>) -> FileVector { - let mut fv = FileVector::new(); - - for word in words { - let i = *self.get(word).unwrap(); - if !fv.contains_key(&i) { - fv.insert(i, 1); - } else { - let c = *fv.get(&i).unwrap(); - fv.insert(i, c + 1); - } - } - - fv - } - - pub fn insert_words_and_vectorize_word_list(&mut self, words : &Vec<&String>) -> FileVector { - let mut fv = FileVector::new(); - - for word in words { - let i = self.set_and_get(word); - if !fv.contains_key(&i) { - fv.insert(i, 1); - } else { - let c = *fv.get(&i).unwrap(); - fv.insert(i, c + 1); - } - } - - fv - } -} |