aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/filecache.rs20
-rw-r--r--src/gui/mod.rs8
-rw-r--r--src/index.rs82
-rw-r--r--src/main.rs19
4 files changed, 105 insertions, 24 deletions
diff --git a/src/filecache.rs b/src/filecache.rs
index f8d84ec..a352e58 100644
--- a/src/filecache.rs
+++ b/src/filecache.rs
@@ -1,10 +1,26 @@
+use std::hash::{Hasher, Hash};
+
use crate::vector::FileVector;
/// Represents one file which was indexed.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
pub struct FileCache {
- pub vector : FileVector,
pub path : String,
+ pub vector : FileVector,
+}
+
+impl PartialEq for FileCache {
+ fn eq(&self, other : &Self) -> bool {
+ self.path == other.path
+ }
+}
+
+impl Eq for FileCache { }
+
+impl Hash for FileCache {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.path.hash(state);
+ }
}
impl FileCache {
diff --git a/src/gui/mod.rs b/src/gui/mod.rs
index a32ceff..5677db1 100644
--- a/src/gui/mod.rs
+++ b/src/gui/mod.rs
@@ -81,7 +81,7 @@ async fn load_file() -> Index {
let file = file.unwrap();
let file = file.to_str();
let file = file.unwrap();
- Index::from_file(file)
+ Index::from_file(file.to_string())
}
async fn generate() -> Index {
@@ -100,11 +100,13 @@ async fn generate() -> Index {
let input = input.unwrap();
let input = input.to_str();
let input = input.unwrap();
- Index::generate(input, file, |counter, nof| {
+ let index = Index::generate(input, |counter, nof| {
let p = ((counter * 100) / nof) as u8;
*GENERATE_PROGRESS.lock().unwrap() = p;
std::io::stdout().flush().ok();
- })
+ });
+ index.save(file.to_string());
+ index
}
async fn generate_update_timer() -> u8 {
diff --git a/src/index.rs b/src/index.rs
index d90f403..27bb56b 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -1,3 +1,4 @@
+use std::collections::{HashSet, HashMap};
use std::fs::File;
use std::io::{Write, BufReader, BufRead};
use walkdir::*;
@@ -34,8 +35,7 @@ impl Index {
}
}
- pub fn generate(input_path : &str, index_path : &str, callback : impl Fn(u64, u64)) -> Self {
- let mut index_file = File::create(index_path).unwrap();
+ pub fn generate(input_path : &str, callback : impl Fn(u64, u64)) -> Self {
let mut dict = Dictionary::new();
let mut filecache : Vec<FileCache> = Vec::new();
let mut nof = 0;
@@ -62,16 +62,6 @@ impl Index {
}
let fv = dict.vectorize_word_list(words.clone());
- writeln!(
- index_file,
- "{}, {}",
- entry.path()
- .to_str()
- .unwrap()
- .replace(',', "\0"),
- fv.stringify()
- ).ok();
-
filecache.push(FileCache {
path : entry.path().to_str().unwrap().to_string(),
vector : fv
@@ -91,9 +81,6 @@ impl Index {
}
callback(nof, nof);
-
- let dict_list : Vec<String> = dict.to_list();
- writeln!(index_file, "#{}", dict_list.join(",")).ok();
});
Self {
@@ -102,7 +89,7 @@ impl Index {
}
}
- pub fn from_file(path : &str) -> Self {
+ pub fn from_file(path : String) -> Self {
let index_file = File::open(path).expect("could not open index file");
let reader = BufReader::new(index_file);
let mut filecache : Vec<FileCache> = Vec::new();
@@ -124,6 +111,53 @@ impl Index {
}
}
+ pub fn merge(a : Index, b : Index) -> Self {
+ let mut a_hash : HashSet<FileCache> = HashSet::new();
+ let mut diff : Vec<FileCache> = Vec::new();
+ let mut dict = a.dictionary.clone();
+ let mut filecache = a.filecache.clone();
+
+ for file in a.filecache.iter() {
+ a_hash.insert(file.clone());
+ }
+
+ for file in b.filecache.iter() {
+ if !a_hash.contains(file) {
+ diff.push(file.clone());
+ }
+ }
+
+ for (word, _) in b.dictionary.iter() {
+ dict.set(word.clone());
+ }
+
+ let mut b_id_to_word : HashMap<u64, String> = HashMap::new();
+
+ for (value, id) in b.dictionary.iter() {
+ b_id_to_word.insert(*id, value.clone());
+ }
+
+ for file in diff {
+ let mut words = Vec::new();
+
+ for (word_id, i) in file.vector.iter() {
+ for _ in 0..*i {
+ words.push(b_id_to_word.get(word_id).unwrap().clone());
+ }
+ }
+
+ filecache.push(FileCache {
+ path : file.path.clone(),
+ vector: dict.vectorize_word_list(words)
+ });
+ }
+
+ Self {
+ dictionary: dict,
+ filecache
+ }
+ }
+
pub fn search(&self, search_args : Vec<String>) -> Vec<SearchResult> {
let mut v : FileVector = FileVector::new();
@@ -145,4 +179,20 @@ impl Index {
results.sort_by(|a, b| b.priority.cmp(&a.priority));
results
}
+
+ pub fn save(&self, output : String) {
+ let mut index_file = File::create(output).unwrap();
+
+ for file in self.filecache.iter() {
+ writeln!(
+ index_file,
+ "{}, {}",
+ file.path .replace(',', "\0"),
+ file.vector.stringify()
+ ).ok();
+ }
+
+ let dict_list : Vec<String> = self.dictionary.to_list();
+ writeln!(index_file, "#{}", dict_list.join(",")).ok();
+ }
}
diff --git a/src/main.rs b/src/main.rs
index fc82739..8cb8466 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -27,10 +27,10 @@ fn main() {
let input = args.get(2).unwrap();
let file = args.get(3).unwrap();
- let _ = Index::generate(input, file, |counter, nof| {
+ let _ = Index::generate(input, |counter, nof| {
eprint!("\r\x1b[2K{} of {} files indexed ({}%)", counter, nof, (counter * 100) / nof);
std::io::stdout().flush().ok();
- });
+ }).save(file.to_string());
} else if cmd == "-s" {
if args.len() < 4 {
eprintln!("{} -s <indexfile> ...", args.get(0).unwrap());
@@ -42,11 +42,24 @@ fn main() {
let search = v.join(" ");
let searchvec = splitter::split_to_words(search);
- let idx = Index::from_file(file.as_str());
+ let idx = Index::from_file(file);
let results = idx.search(searchvec);
for result in results {
println!("{}", result.path);
}
+ } else if cmd == "-m" {
+ if args.len() != 5 {
+ eprintln!("{} -m <index1> <index2> <merged index>", args.get(0).unwrap());
+ return;
+ }
+
+ let index1 = args.get(2).unwrap().clone();
+ let index2 = args.get(3).unwrap().clone();
+ let merged = args.get(4).unwrap().clone();
+ let _ = Index::merge(
+ Index::from_file(index1),
+ Index::from_file(index2)
+ ).save(merged);
}
} else {
let _ = gui::run();