use std::collections::HashMap; use crate::vector::FileVector; /// The dictionary is used to cache to words ids. /// It also provides a function to convert it to /// a vector and generate a FileVector from a word list /// with the current directory. #[derive(Clone, Debug)] pub struct Dictionary { last_index : usize, data : HashMap, } impl Default for Dictionary { fn default() -> Self { Self::new() } } impl Dictionary { pub fn new() -> Self { Self { last_index : 0, data : HashMap::new() } } pub fn from_line(line : &str) -> Self { let mut data : HashMap = HashMap::new(); let mut i : usize = 0; for word in line.split(',') { data.insert(word.to_string(), i as u64); i += 1; } Self { last_index : i - 1, data } } pub fn set(&mut self, name : &String) { if !self.data.contains_key(name) { self.last_index += 1; self.data.insert(name.clone(), self.last_index as u64); } } pub fn set_and_get(&mut self, name : &String) -> u64 { if !self.data.contains_key(name) { self.last_index += 1; self.data.insert(name.clone(), self.last_index as u64); self.last_index as u64 } else { *self.data.get(name).unwrap() } } pub fn get(&self, name : &String) -> Option<&u64> { self.data.get(name) } pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, u64> { self.data.iter() } pub fn to_list(&self) -> Vec { let mut v = Vec::with_capacity(self.last_index + 1); v.resize(self.last_index + 1, "".to_string()); for (word, id) in self.iter() { v[(*id) as usize] = word.clone(); } v } pub fn vectorize_word_list(&self, words : &Vec<&String>) -> FileVector { let mut fv = FileVector::new(); for word in words { let i = *self.get(word).unwrap(); if !fv.contains_key(&i) { fv.insert(i, 1); } else { let c : u64 = *fv.get(&i).unwrap(); fv.insert(i, c + 1); } } fv } pub fn insert_words_and_vectorize_word_list(&mut self, words : &Vec<&String>) -> FileVector { let mut fv = FileVector::new(); for word in words { let i = self.set_and_get(word); if !fv.contains_key(&i) { fv.insert(i, 1); } else { let c : u64 = *fv.get(&i).unwrap(); fv.insert(i, c + 1); } } fv } }