From ae8fad30cd9e76bcba9949095a2cafabb4f1ca8a Mon Sep 17 00:00:00 2001 From: Nathan Reiner Date: Sat, 8 Jul 2023 13:29:37 +0200 Subject: add merge functionality --- src/filecache.rs | 20 ++++++++++++-- src/gui/mod.rs | 8 +++--- src/index.rs | 82 +++++++++++++++++++++++++++++++++++++++++++++----------- src/main.rs | 19 ++++++++++--- 4 files changed, 105 insertions(+), 24 deletions(-) diff --git a/src/filecache.rs b/src/filecache.rs index f8d84ec..a352e58 100644 --- a/src/filecache.rs +++ b/src/filecache.rs @@ -1,10 +1,26 @@ +use std::hash::{Hasher, Hash}; + use crate::vector::FileVector; /// Represents one file which was indexed. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct FileCache { - pub vector : FileVector, pub path : String, + pub vector : FileVector, +} + +impl PartialEq for FileCache { + fn eq(&self, other : &Self) -> bool { + self.path == other.path + } +} + +impl Eq for FileCache { } + +impl Hash for FileCache { + fn hash(&self, state: &mut H) { + self.path.hash(state); + } } impl FileCache { diff --git a/src/gui/mod.rs b/src/gui/mod.rs index a32ceff..5677db1 100644 --- a/src/gui/mod.rs +++ b/src/gui/mod.rs @@ -81,7 +81,7 @@ async fn load_file() -> Index { let file = file.unwrap(); let file = file.to_str(); let file = file.unwrap(); - Index::from_file(file) + Index::from_file(file.to_string()) } async fn generate() -> Index { @@ -100,11 +100,13 @@ async fn generate() -> Index { let input = input.unwrap(); let input = input.to_str(); let input = input.unwrap(); - Index::generate(input, file, |counter, nof| { + let index = Index::generate(input, |counter, nof| { let p = ((counter * 100) / nof) as u8; *GENERATE_PROGRESS.lock().unwrap() = p; std::io::stdout().flush().ok(); - }) + }); + index.save(file.to_string()); + index } async fn generate_update_timer() -> u8 { diff --git a/src/index.rs b/src/index.rs index d90f403..27bb56b 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,3 +1,4 @@ +use std::collections::{HashSet, HashMap}; use std::fs::File; use std::io::{Write, BufReader, BufRead}; use walkdir::*; @@ -34,8 +35,7 @@ impl Index { } } - pub fn generate(input_path : &str, index_path : &str, callback : impl Fn(u64, u64)) -> Self { - let mut index_file = File::create(index_path).unwrap(); + pub fn generate(input_path : &str, callback : impl Fn(u64, u64)) -> Self { let mut dict = Dictionary::new(); let mut filecache : Vec = Vec::new(); let mut nof = 0; @@ -62,16 +62,6 @@ impl Index { } let fv = dict.vectorize_word_list(words.clone()); - writeln!( - index_file, - "{}, {}", - entry.path() - .to_str() - .unwrap() - .replace(',', "\0"), - fv.stringify() - ).ok(); - filecache.push(FileCache { path : entry.path().to_str().unwrap().to_string(), vector : fv @@ -91,9 +81,6 @@ impl Index { } callback(nof, nof); - - let dict_list : Vec = dict.to_list(); - writeln!(index_file, "#{}", dict_list.join(",")).ok(); }); Self { @@ -102,7 +89,7 @@ impl Index { } } - pub fn from_file(path : &str) -> Self { + pub fn from_file(path : String) -> Self { let index_file = File::open(path).expect("could not open index file"); let reader = BufReader::new(index_file); let mut filecache : Vec = Vec::new(); @@ -124,6 +111,53 @@ impl Index { } } + pub fn merge(a : Index, b : Index) -> Self { + let mut a_hash : HashSet = HashSet::new(); + let mut diff : Vec = Vec::new(); + let mut dict = a.dictionary.clone(); + let mut filecache = a.filecache.clone(); + + for file in a.filecache.iter() { + a_hash.insert(file.clone()); + } + + for file in b.filecache.iter() { + if !a_hash.contains(file) { + diff.push(file.clone()); + } + } + + for (word, _) in b.dictionary.iter() { + dict.set(word.clone()); + } + + let mut b_id_to_word : HashMap = HashMap::new(); + + for (value, id) in b.dictionary.iter() { + b_id_to_word.insert(*id, value.clone()); + } + + for file in diff { + let mut words = Vec::new(); + + for (word_id, i) in file.vector.iter() { + for _ in 0..*i { + words.push(b_id_to_word.get(word_id).unwrap().clone()); + } + } + + filecache.push(FileCache { + path : file.path.clone(), + vector: dict.vectorize_word_list(words) + }); + } + + Self { + dictionary: dict, + filecache + } + } + pub fn search(&self, search_args : Vec) -> Vec { let mut v : FileVector = FileVector::new(); @@ -145,4 +179,20 @@ impl Index { results.sort_by(|a, b| b.priority.cmp(&a.priority)); results } + + pub fn save(&self, output : String) { + let mut index_file = File::create(output).unwrap(); + + for file in self.filecache.iter() { + writeln!( + index_file, + "{}, {}", + file.path .replace(',', "\0"), + file.vector.stringify() + ).ok(); + } + + let dict_list : Vec = self.dictionary.to_list(); + writeln!(index_file, "#{}", dict_list.join(",")).ok(); + } } diff --git a/src/main.rs b/src/main.rs index fc82739..8cb8466 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,10 +27,10 @@ fn main() { let input = args.get(2).unwrap(); let file = args.get(3).unwrap(); - let _ = Index::generate(input, file, |counter, nof| { + let _ = Index::generate(input, |counter, nof| { eprint!("\r\x1b[2K{} of {} files indexed ({}%)", counter, nof, (counter * 100) / nof); std::io::stdout().flush().ok(); - }); + }).save(file.to_string()); } else if cmd == "-s" { if args.len() < 4 { eprintln!("{} -s ...", args.get(0).unwrap()); @@ -42,11 +42,24 @@ fn main() { let search = v.join(" "); let searchvec = splitter::split_to_words(search); - let idx = Index::from_file(file.as_str()); + let idx = Index::from_file(file); let results = idx.search(searchvec); for result in results { println!("{}", result.path); } + } else if cmd == "-m" { + if args.len() != 5 { + eprintln!("{} -m ", args.get(0).unwrap()); + return; + } + + let index1 = args.get(2).unwrap().clone(); + let index2 = args.get(3).unwrap().clone(); + let merged = args.get(4).unwrap().clone(); + let _ = Index::merge( + Index::from_file(index1), + Index::from_file(index2) + ).save(merged); } } else { let _ = gui::run(); -- cgit v1.2.3-70-g09d2