1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
use std::collections::HashMap;
use crate::vector::FileVector;
/// The dictionary is used to cache to words ids.
/// It also provides a function to convert it to
/// a vector and generate a FileVector from a word list
/// with the current directory.
#[derive(Clone, Debug)]
pub struct Dictionary {
last_index : usize,
data : HashMap<String, u64>,
}
impl Default for Dictionary {
fn default() -> Self {
Self::new()
}
}
impl Dictionary {
pub fn new() -> Self {
Self { last_index : 0, data : HashMap::new() }
}
pub fn from_line(line : &str) -> Self {
let mut data : HashMap<String, u64> = HashMap::new();
let mut i : usize = 0;
for word in line.split(',') {
data.insert(word.to_string(), i as u64);
i += 1;
}
Self { last_index : i - 1, data }
}
pub fn set(&mut self, name : String) {
if let std::collections::hash_map::Entry::Vacant(e) = self.data.entry(name) {
self.last_index += 1;
e.insert(self.last_index as u64);
}
}
pub fn get(&self, name : String) -> Option<&u64> {
self.data.get(&name)
}
pub fn iter(&self) -> &HashMap<String, u64> {
&self.data
}
pub fn to_list(&self) -> Vec<String> {
let mut v = Vec::with_capacity(self.last_index + 1);
v.resize(self.last_index + 1, "".to_string());
for (word, id) in self.iter() {
v[(*id) as usize] = word.clone();
}
v
}
pub fn vectorize_word_list(&self, words : Vec<String>) -> FileVector {
let mut fv = FileVector::new();
for word in words {
let i = *self.get(word).unwrap();
if !fv.contains_key(&i) {
fv.insert(i, 1);
} else {
let c : u64 = *fv.get(&i).unwrap();
fv.insert(i, c + 1);
}
}
fv
}
}
|