aboutsummaryrefslogtreecommitdiff
path: root/src/dictionary.rs
blob: a8d9b287f4336ad7243f7b791aef053005e6f4c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
use std::collections::HashMap;
use crate::vector::FileVector;

pub struct Dictionary {
    last_index : usize,
    data : HashMap<String, u64>,
}


impl Dictionary {
    pub fn new() -> Self {
        Self { last_index : 0, data : HashMap::new() }
    }

    pub fn from_line(line : &str) -> Self {
        let mut data : HashMap<String, u64> = HashMap::new();
        let mut i : usize = 0;

        for word in line.split(',') {
            data.insert(word.to_string(), i as u64);
            i += 1;
        }

        Self { last_index : i - 1, data }
    }

    pub fn set(&mut self, name : String) {
        if !self.data.contains_key(&name) {
            self.last_index += 1;
            self.data.insert(name, self.last_index as u64);
        }
    }

    pub fn get(&self, name : String) -> u64 {
        *self.data.get(&name).unwrap()
    }

    pub fn iter(&self) -> &HashMap<String, u64> {
        &self.data
    }

    pub fn to_list(&self) -> Vec<String> {
        let mut v = Vec::with_capacity(self.last_index + 1);

        v.resize(self.last_index + 1, "".to_string());

        for (word, id) in self.iter() {
            v[(*id) as usize] = word.clone();
        }

        v
    }

    pub fn vectorize_word_list(&self, words : Vec<String>) -> FileVector {
        let mut fv = FileVector::new();

        for word in words {
            let i = self.get(word);
            if !fv.contains_key(&i) {
                fv.insert(i, 1);
            } else {
                let c : u64 = *fv.get(&i).unwrap();
                fv.insert(i, c + 1);
            }
        }

        fv
    }
}