diff options
| author | Nathan Reiner <nathan@nathanreiner.xyz> | 2023-07-14 00:22:39 +0200 |
|---|---|---|
| committer | Nathan Reiner <nathan@nathanreiner.xyz> | 2023-07-14 00:22:39 +0200 |
| commit | 149e0b6ae9871515be21f23b492f5ef7355e2ca4 (patch) | |
| tree | d5b1bf8281a3a1cf181d5c921a53dfd99fd8b7a9 /src/vector.rs | |
| parent | 0723ea6b6bb6832b11582eeb8a330d2bdb6077b5 (diff) | |
make fast using hash instead of dictionary
Diffstat (limited to 'src/vector.rs')
| -rw-r--r-- | src/vector.rs | 43 |
1 files changed, 17 insertions, 26 deletions
diff --git a/src/vector.rs b/src/vector.rs index c058490..fa0a139 100644 --- a/src/vector.rs +++ b/src/vector.rs @@ -1,11 +1,14 @@ use std::collections::HashMap; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; use std::ops::{Deref, DerefMut}; +use serde::{Deserialize, Serialize}; /// Represents the content of a cached file. /// It is stored as a HashMap, because we do not /// have to store the zeros. With that we save a lot /// of storage. -#[derive(Clone, Debug)] +#[derive(Default, Clone, Debug, Deserialize, Serialize)] pub struct FileVector { data : HashMap<u64, u64> } @@ -23,42 +26,30 @@ impl DerefMut for FileVector { } } -impl Default for FileVector { - fn default() -> Self { - Self::new() - } -} - impl FileVector { pub fn new() -> Self { Self { data : HashMap::new() } } - pub fn from_string(hex : &str) -> Self { - let mut data : HashMap<u64, u64> = HashMap::new(); - let data_chunks : Vec<&str> = hex.split(' ').collect(); + pub fn from_words(words: Vec<String>) -> Self { + let mut data = HashMap::new(); - for chunk in data_chunks { - if !chunk.is_empty() { - let n : Vec<&str> = chunk.split(';').collect(); - let i : u64 = u64::from_str_radix(n[0], 16).expect("could not extract index"); - let v : u64 = u64::from_str_radix(n[1], 16).expect("could not extract value"); - data.insert(i, v); + for word in words { + let mut hasher = DefaultHasher::new(); + word.hash(&mut hasher); + let k = hasher.finish(); + match data.entry(k) { + std::collections::hash_map::Entry::Occupied(mut e) => { + e.insert(e.get() + 1); + } + std::collections::hash_map::Entry::Vacant(e) => { + e.insert(1); + } } } Self { data } } - - pub fn stringify(&self) -> String { - let mut hex = String::new(); - - for (i, v) in self.data.iter() { - hex += &format!("{:x};{:x} ", *i, *v); - } - - hex.trim().to_string() - } } pub fn scalar_product(a : &FileVector, b : &FileVector) -> u64 { |