diff options
Diffstat (limited to 'src/index.rs')
| -rw-r--r-- | src/index.rs | 82 |
1 files changed, 66 insertions, 16 deletions
diff --git a/src/index.rs b/src/index.rs index d90f403..27bb56b 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,3 +1,4 @@ +use std::collections::{HashSet, HashMap}; use std::fs::File; use std::io::{Write, BufReader, BufRead}; use walkdir::*; @@ -34,8 +35,7 @@ impl Index { } } - pub fn generate(input_path : &str, index_path : &str, callback : impl Fn(u64, u64)) -> Self { - let mut index_file = File::create(index_path).unwrap(); + pub fn generate(input_path : &str, callback : impl Fn(u64, u64)) -> Self { let mut dict = Dictionary::new(); let mut filecache : Vec<FileCache> = Vec::new(); let mut nof = 0; @@ -62,16 +62,6 @@ impl Index { } let fv = dict.vectorize_word_list(words.clone()); - writeln!( - index_file, - "{}, {}", - entry.path() - .to_str() - .unwrap() - .replace(',', "\0"), - fv.stringify() - ).ok(); - filecache.push(FileCache { path : entry.path().to_str().unwrap().to_string(), vector : fv @@ -91,9 +81,6 @@ impl Index { } callback(nof, nof); - - let dict_list : Vec<String> = dict.to_list(); - writeln!(index_file, "#{}", dict_list.join(",")).ok(); }); Self { @@ -102,7 +89,7 @@ impl Index { } } - pub fn from_file(path : &str) -> Self { + pub fn from_file(path : String) -> Self { let index_file = File::open(path).expect("could not open index file"); let reader = BufReader::new(index_file); let mut filecache : Vec<FileCache> = Vec::new(); @@ -124,6 +111,53 @@ impl Index { } } + pub fn merge(a : Index, b : Index) -> Self { + let mut a_hash : HashSet<FileCache> = HashSet::new(); + let mut diff : Vec<FileCache> = Vec::new(); + let mut dict = a.dictionary.clone(); + let mut filecache = a.filecache.clone(); + + for file in a.filecache.iter() { + a_hash.insert(file.clone()); + } + + for file in b.filecache.iter() { + if !a_hash.contains(file) { + diff.push(file.clone()); + } + } + + for (word, _) in b.dictionary.iter() { + dict.set(word.clone()); + } + + let mut b_id_to_word : HashMap<u64, String> = HashMap::new(); + + for (value, id) in b.dictionary.iter() { + b_id_to_word.insert(*id, value.clone()); + } + + for file in diff { + let mut words = Vec::new(); + + for (word_id, i) in file.vector.iter() { + for _ in 0..*i { + words.push(b_id_to_word.get(word_id).unwrap().clone()); + } + } + + filecache.push(FileCache { + path : file.path.clone(), + vector: dict.vectorize_word_list(words) + }); + } + + Self { + dictionary: dict, + filecache + } + } + pub fn search(&self, search_args : Vec<String>) -> Vec<SearchResult> { let mut v : FileVector = FileVector::new(); @@ -145,4 +179,20 @@ impl Index { results.sort_by(|a, b| b.priority.cmp(&a.priority)); results } + + pub fn save(&self, output : String) { + let mut index_file = File::create(output).unwrap(); + + for file in self.filecache.iter() { + writeln!( + index_file, + "{}, {}", + file.path .replace(',', "\0"), + file.vector.stringify() + ).ok(); + } + + let dict_list : Vec<String> = self.dictionary.to_list(); + writeln!(index_file, "#{}", dict_list.join(",")).ok(); + } } |