aboutsummaryrefslogtreecommitdiff
path: root/src/index.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/index.rs')
-rw-r--r--src/index.rs82
1 files changed, 66 insertions, 16 deletions
diff --git a/src/index.rs b/src/index.rs
index d90f403..27bb56b 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -1,3 +1,4 @@
+use std::collections::{HashSet, HashMap};
use std::fs::File;
use std::io::{Write, BufReader, BufRead};
use walkdir::*;
@@ -34,8 +35,7 @@ impl Index {
}
}
- pub fn generate(input_path : &str, index_path : &str, callback : impl Fn(u64, u64)) -> Self {
- let mut index_file = File::create(index_path).unwrap();
+ pub fn generate(input_path : &str, callback : impl Fn(u64, u64)) -> Self {
let mut dict = Dictionary::new();
let mut filecache : Vec<FileCache> = Vec::new();
let mut nof = 0;
@@ -62,16 +62,6 @@ impl Index {
}
let fv = dict.vectorize_word_list(words.clone());
- writeln!(
- index_file,
- "{}, {}",
- entry.path()
- .to_str()
- .unwrap()
- .replace(',', "\0"),
- fv.stringify()
- ).ok();
-
filecache.push(FileCache {
path : entry.path().to_str().unwrap().to_string(),
vector : fv
@@ -91,9 +81,6 @@ impl Index {
}
callback(nof, nof);
-
- let dict_list : Vec<String> = dict.to_list();
- writeln!(index_file, "#{}", dict_list.join(",")).ok();
});
Self {
@@ -102,7 +89,7 @@ impl Index {
}
}
- pub fn from_file(path : &str) -> Self {
+ pub fn from_file(path : String) -> Self {
let index_file = File::open(path).expect("could not open index file");
let reader = BufReader::new(index_file);
let mut filecache : Vec<FileCache> = Vec::new();
@@ -124,6 +111,53 @@ impl Index {
}
}
+ pub fn merge(a : Index, b : Index) -> Self {
+ let mut a_hash : HashSet<FileCache> = HashSet::new();
+ let mut diff : Vec<FileCache> = Vec::new();
+ let mut dict = a.dictionary.clone();
+ let mut filecache = a.filecache.clone();
+
+ for file in a.filecache.iter() {
+ a_hash.insert(file.clone());
+ }
+
+ for file in b.filecache.iter() {
+ if !a_hash.contains(file) {
+ diff.push(file.clone());
+ }
+ }
+
+ for (word, _) in b.dictionary.iter() {
+ dict.set(word.clone());
+ }
+
+ let mut b_id_to_word : HashMap<u64, String> = HashMap::new();
+
+ for (value, id) in b.dictionary.iter() {
+ b_id_to_word.insert(*id, value.clone());
+ }
+
+ for file in diff {
+ let mut words = Vec::new();
+
+ for (word_id, i) in file.vector.iter() {
+ for _ in 0..*i {
+ words.push(b_id_to_word.get(word_id).unwrap().clone());
+ }
+ }
+
+ filecache.push(FileCache {
+ path : file.path.clone(),
+ vector: dict.vectorize_word_list(words)
+ });
+ }
+
+ Self {
+ dictionary: dict,
+ filecache
+ }
+ }
+
pub fn search(&self, search_args : Vec<String>) -> Vec<SearchResult> {
let mut v : FileVector = FileVector::new();
@@ -145,4 +179,20 @@ impl Index {
results.sort_by(|a, b| b.priority.cmp(&a.priority));
results
}
+
+ pub fn save(&self, output : String) {
+ let mut index_file = File::create(output).unwrap();
+
+ for file in self.filecache.iter() {
+ writeln!(
+ index_file,
+ "{}, {}",
+ file.path .replace(',', "\0"),
+ file.vector.stringify()
+ ).ok();
+ }
+
+ let dict_list : Vec<String> = self.dictionary.to_list();
+ writeln!(index_file, "#{}", dict_list.join(",")).ok();
+ }
}