use std::collections::HashMap; use crate::vector::FileVector; /// The dictionary is used to cache to words ids. /// It also provides a function to convert it to /// a vector and generate a FileVector from a word list /// with the current directory. #[derive(Clone, Debug)] pub struct Dictionary { last_index : usize, data : HashMap, } impl Default for Dictionary { fn default() -> Self { Self::new() } } impl Dictionary { pub fn new() -> Self { Self { last_index : 0, data : HashMap::new() } } pub fn from_line(line : &str) -> Self { let mut data : HashMap = HashMap::new(); let mut i : usize = 0; for word in line.split(',') { data.insert(word.to_string(), i as u64); i += 1; } Self { last_index : i - 1, data } } pub fn set(&mut self, name : String) { if let std::collections::hash_map::Entry::Vacant(e) = self.data.entry(name) { self.last_index += 1; e.insert(self.last_index as u64); } } pub fn get(&self, name : String) -> Option<&u64> { self.data.get(&name) } pub fn iter(&self) -> &HashMap { &self.data } pub fn to_list(&self) -> Vec { let mut v = Vec::with_capacity(self.last_index + 1); v.resize(self.last_index + 1, "".to_string()); for (word, id) in self.iter() { v[(*id) as usize] = word.clone(); } v } pub fn vectorize_word_list(&self, words : Vec) -> FileVector { let mut fv = FileVector::new(); for word in words { let i = *self.get(word).unwrap(); if !fv.contains_key(&i) { fv.insert(i, 1); } else { let c : u64 = *fv.get(&i).unwrap(); fv.insert(i, c + 1); } } fv } }