1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
use std::collections::HashMap;
use crate::vector::FileVector;
/// The dictionary is used to cache to words ids.
/// It also provides a function to convert it to
/// a vector and generate a FileVector from a word list
/// with the current directory.
#[derive(Clone, Debug)]
pub struct Dictionary {
last_index : usize,
data : HashMap<String, u64>,
}
impl Default for Dictionary {
fn default() -> Self {
Self::new()
}
}
impl Dictionary {
pub fn new() -> Self {
Self { last_index : 0, data : HashMap::new() }
}
pub fn from_line(line : &str) -> Self {
let mut data : HashMap<String, u64> = HashMap::new();
let mut i : usize = 0;
for word in line.split(',') {
data.insert(word.to_string(), i as u64);
i += 1;
}
Self { last_index : i - 1, data }
}
pub fn set(&mut self, name : &String) {
if !self.data.contains_key(name) {
self.last_index += 1;
self.data.insert(name.clone(), self.last_index as u64);
}
}
pub fn set_and_get(&mut self, name : &String) -> u64 {
if !self.data.contains_key(name) {
self.last_index += 1;
self.data.insert(name.clone(), self.last_index as u64);
self.last_index as u64
} else {
*self.data.get(name).unwrap()
}
}
pub fn get(&self, name : &String) -> Option<&u64> {
self.data.get(name)
}
pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, u64> {
self.data.iter()
}
pub fn to_list(&self) -> Vec<String> {
let mut v = Vec::with_capacity(self.last_index + 1);
v.resize(self.last_index + 1, "".to_string());
for (word, id) in self.iter() {
v[(*id) as usize] = word.clone();
}
v
}
pub fn vectorize_word_list(&self, words : &Vec<&String>) -> FileVector {
let mut fv = FileVector::new();
for word in words {
let i = *self.get(word).unwrap();
if !fv.contains_key(&i) {
fv.insert(i, 1);
} else {
let c : u64 = *fv.get(&i).unwrap();
fv.insert(i, c + 1);
}
}
fv
}
pub fn insert_words_and_vectorize_word_list(&mut self, words : &Vec<&String>) -> FileVector {
let mut fv = FileVector::new();
for word in words {
let i = self.set_and_get(word);
if !fv.contains_key(&i) {
fv.insert(i, 1);
} else {
let c : u64 = *fv.get(&i).unwrap();
fv.insert(i, c + 1);
}
}
fv
}
}
|