aboutsummaryrefslogtreecommitdiff
path: root/src/dictionary.rs
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2023-07-14 00:22:39 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2023-07-14 00:22:39 +0200
commit149e0b6ae9871515be21f23b492f5ef7355e2ca4 (patch)
treed5b1bf8281a3a1cf181d5c921a53dfd99fd8b7a9 /src/dictionary.rs
parent0723ea6b6bb6832b11582eeb8a330d2bdb6077b5 (diff)
make fast using hash instead of dictionary
Diffstat (limited to 'src/dictionary.rs')
-rw-r--r--src/dictionary.rs105
1 files changed, 0 insertions, 105 deletions
diff --git a/src/dictionary.rs b/src/dictionary.rs
deleted file mode 100644
index 3e05b91..0000000
--- a/src/dictionary.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-use std::collections::HashMap;
-use crate::vector::FileVector;
-
-/// The dictionary is used to cache to words ids.
-/// It also provides a function to convert it to
-/// a vector and generate a FileVector from a word list
-/// with the current directory.
-#[derive(Clone, Debug)]
-pub struct Dictionary {
- last_index : usize,
- data : HashMap<String, u64>,
-}
-
-impl Default for Dictionary {
- fn default() -> Self {
- Self::new()
- }
-}
-
-impl Dictionary {
- pub fn new() -> Self {
- Self { last_index : 0, data : HashMap::new() }
- }
-
- pub fn from_line(line : &str) -> Self {
- let mut data : HashMap<String, u64> = HashMap::new();
- let mut i : usize = 0;
-
- for word in line.split(',') {
- data.insert(word.to_string(), i as u64);
- i += 1;
- }
-
- Self { last_index : i - 1, data }
- }
-
- pub fn set(&mut self, name : &String) {
- if !self.data.contains_key(name) {
- self.last_index += 1;
- self.data.insert(name.clone(), self.last_index as u64);
- }
- }
-
- pub fn set_and_get(&mut self, name : &String) -> u64 {
- if !self.data.contains_key(name) {
- self.last_index += 1;
- self.data.insert(name.clone(), self.last_index as u64);
- self.last_index as u64
- } else {
- *self.data.get(name).unwrap()
- }
- }
-
- pub fn get(&self, name : &String) -> Option<&u64> {
- self.data.get(name)
- }
-
- pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, u64> {
- self.data.iter()
- }
-
- pub fn to_list(&self) -> Vec<String> {
- let mut v = Vec::with_capacity(self.last_index + 1);
-
- v.resize(self.last_index + 1, "".to_string());
-
- for (word, id) in self.iter() {
- v[(*id) as usize] = word.clone();
- }
-
- v
- }
-
- pub fn vectorize_word_list(&self, words : &Vec<&String>) -> FileVector {
- let mut fv = FileVector::new();
-
- for word in words {
- let i = *self.get(word).unwrap();
- if !fv.contains_key(&i) {
- fv.insert(i, 1);
- } else {
- let c = *fv.get(&i).unwrap();
- fv.insert(i, c + 1);
- }
- }
-
- fv
- }
-
- pub fn insert_words_and_vectorize_word_list(&mut self, words : &Vec<&String>) -> FileVector {
- let mut fv = FileVector::new();
-
- for word in words {
- let i = self.set_and_get(word);
- if !fv.contains_key(&i) {
- fv.insert(i, 1);
- } else {
- let c = *fv.get(&i).unwrap();
- fv.insert(i, c + 1);
- }
- }
-
- fv
- }
-}