aboutsummaryrefslogtreecommitdiff
path: root/src/dictionary.rs
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2023-07-05 23:07:26 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2023-07-05 23:07:26 +0200
commit4d577650f737daaeb477bbbd5ae2bad4f1121c38 (patch)
treeac973541e0a2d7751af4ece5f7f639e739f81fcc /src/dictionary.rs
first sketch of indexer
Diffstat (limited to 'src/dictionary.rs')
-rw-r--r--src/dictionary.rs69
1 files changed, 69 insertions, 0 deletions
diff --git a/src/dictionary.rs b/src/dictionary.rs
new file mode 100644
index 0000000..a8d9b28
--- /dev/null
+++ b/src/dictionary.rs
@@ -0,0 +1,69 @@
+use std::collections::HashMap;
+use crate::vector::FileVector;
+
+pub struct Dictionary {
+ last_index : usize,
+ data : HashMap<String, u64>,
+}
+
+
+impl Dictionary {
+ pub fn new() -> Self {
+ Self { last_index : 0, data : HashMap::new() }
+ }
+
+ pub fn from_line(line : &str) -> Self {
+ let mut data : HashMap<String, u64> = HashMap::new();
+ let mut i : usize = 0;
+
+ for word in line.split(',') {
+ data.insert(word.to_string(), i as u64);
+ i += 1;
+ }
+
+ Self { last_index : i - 1, data }
+ }
+
+ pub fn set(&mut self, name : String) {
+ if !self.data.contains_key(&name) {
+ self.last_index += 1;
+ self.data.insert(name, self.last_index as u64);
+ }
+ }
+
+ pub fn get(&self, name : String) -> u64 {
+ *self.data.get(&name).unwrap()
+ }
+
+ pub fn iter(&self) -> &HashMap<String, u64> {
+ &self.data
+ }
+
+ pub fn to_list(&self) -> Vec<String> {
+ let mut v = Vec::with_capacity(self.last_index + 1);
+
+ v.resize(self.last_index + 1, "".to_string());
+
+ for (word, id) in self.iter() {
+ v[(*id) as usize] = word.clone();
+ }
+
+ v
+ }
+
+ pub fn vectorize_word_list(&self, words : Vec<String>) -> FileVector {
+ let mut fv = FileVector::new();
+
+ for word in words {
+ let i = self.get(word);
+ if !fv.contains_key(&i) {
+ fv.insert(i, 1);
+ } else {
+ let c : u64 = *fv.get(&i).unwrap();
+ fv.insert(i, c + 1);
+ }
+ }
+
+ fv
+ }
+}