aboutsummaryrefslogtreecommitdiff
path: root/src/vector.rs
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2023-07-14 00:22:39 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2023-07-14 00:22:39 +0200
commit149e0b6ae9871515be21f23b492f5ef7355e2ca4 (patch)
treed5b1bf8281a3a1cf181d5c921a53dfd99fd8b7a9 /src/vector.rs
parent0723ea6b6bb6832b11582eeb8a330d2bdb6077b5 (diff)
make fast using hash instead of dictionary
Diffstat (limited to 'src/vector.rs')
-rw-r--r--src/vector.rs43
1 files changed, 17 insertions, 26 deletions
diff --git a/src/vector.rs b/src/vector.rs
index c058490..fa0a139 100644
--- a/src/vector.rs
+++ b/src/vector.rs
@@ -1,11 +1,14 @@
use std::collections::HashMap;
+use std::collections::hash_map::DefaultHasher;
+use std::hash::{Hash, Hasher};
use std::ops::{Deref, DerefMut};
+use serde::{Deserialize, Serialize};
/// Represents the content of a cached file.
/// It is stored as a HashMap, because we do not
/// have to store the zeros. With that we save a lot
/// of storage.
-#[derive(Clone, Debug)]
+#[derive(Default, Clone, Debug, Deserialize, Serialize)]
pub struct FileVector {
data : HashMap<u64, u64>
}
@@ -23,42 +26,30 @@ impl DerefMut for FileVector {
}
}
-impl Default for FileVector {
- fn default() -> Self {
- Self::new()
- }
-}
-
impl FileVector {
pub fn new() -> Self {
Self { data : HashMap::new() }
}
- pub fn from_string(hex : &str) -> Self {
- let mut data : HashMap<u64, u64> = HashMap::new();
- let data_chunks : Vec<&str> = hex.split(' ').collect();
+ pub fn from_words(words: Vec<String>) -> Self {
+ let mut data = HashMap::new();
- for chunk in data_chunks {
- if !chunk.is_empty() {
- let n : Vec<&str> = chunk.split(';').collect();
- let i : u64 = u64::from_str_radix(n[0], 16).expect("could not extract index");
- let v : u64 = u64::from_str_radix(n[1], 16).expect("could not extract value");
- data.insert(i, v);
+ for word in words {
+ let mut hasher = DefaultHasher::new();
+ word.hash(&mut hasher);
+ let k = hasher.finish();
+ match data.entry(k) {
+ std::collections::hash_map::Entry::Occupied(mut e) => {
+ e.insert(e.get() + 1);
+ }
+ std::collections::hash_map::Entry::Vacant(e) => {
+ e.insert(1);
+ }
}
}
Self { data }
}
-
- pub fn stringify(&self) -> String {
- let mut hex = String::new();
-
- for (i, v) in self.data.iter() {
- hex += &format!("{:x};{:x} ", *i, *v);
- }
-
- hex.trim().to_string()
- }
}
pub fn scalar_product(a : &FileVector, b : &FileVector) -> u64 {