use std::collections::HashMap; use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use std::ops::{Deref, DerefMut}; use serde::{Deserialize, Serialize}; /// Represents the content of a cached file. /// It is stored as a HashMap, because we do not /// have to store the zeros. With that we save a lot /// of storage. #[derive(Default, Clone, Debug, Deserialize, Serialize)] pub struct FileVector { data : HashMap } impl Deref for FileVector { type Target = HashMap; fn deref(&self) -> &Self::Target { &self.data } } impl DerefMut for FileVector { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.data } } impl FileVector { pub fn new() -> Self { Self { data : HashMap::new() } } pub fn from_words(words: Vec) -> Self { let mut data = HashMap::new(); for word in words { let mut hasher = DefaultHasher::new(); word.hash(&mut hasher); let k = hasher.finish(); match data.entry(k) { std::collections::hash_map::Entry::Occupied(mut e) => { e.insert(e.get() + 1); } std::collections::hash_map::Entry::Vacant(e) => { e.insert(1); } } } Self { data } } } pub fn scalar_product(a : &FileVector, b : &FileVector) -> u64 { let mut c = 0; for (i, x) in a.iter() { c += x * (b.get(i).unwrap_or(&0)); } c } pub fn match_vector(query : &FileVector, v : &FileVector) -> u64 { let mut c = 0; for (i, x) in query.iter() { let s = x * (v.get(i).unwrap_or(&0)); if s == 0 { return 0 } else { c += s; } } c }