aboutsummaryrefslogtreecommitdiff
path: root/src/index.rs
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2023-07-08 17:29:13 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2023-07-08 17:29:13 +0200
commit8d6b55ebed2bda4705525004863055e99e72da83 (patch)
treee96b24a994f2f350d4dc2fabe6404ec20afc9bc3 /src/index.rs
parentcb3ef295105934a1f8aac54e8521b0fe169c0c38 (diff)
optimize merge
Diffstat (limited to 'src/index.rs')
-rw-r--r--src/index.rs141
1 files changed, 60 insertions, 81 deletions
diff --git a/src/index.rs b/src/index.rs
index 78df3a1..cfb612a 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -107,8 +107,12 @@ impl Index {
match nof_handle {
Some(t) => {
- nof = t.join().unwrap();
- nof_handle = None;
+ if t.is_finished() {
+ nof = t.join().unwrap();
+ nof_handle = None;
+ } else {
+ nof_handle = Some(t);
+ }
}
None => {
// Make sure that we only push a update
@@ -160,105 +164,80 @@ impl Index {
}
}
- fn merge_two(first : Index, second : Index) -> thread::JoinHandle<Self> {
- thread::spawn(move || {
- let (a, b) = if first.filecache.len() < second.filecache.len() {
- (second, first)
- } else {
- (first, second)
- };
- let mut filecache = a.filecache.clone();
- let mut dictionary = Dictionary::default();
-
- thread::scope(|s| {
- let mut a_hash : HashSet<FileCache> = HashSet::new();
- let mut diff : Vec<FileCache> = Vec::new();
+ fn merge_two(a : Index, b : Index) -> Self {
+ let mut filecache = a.filecache.clone();
+ let mut dictionary = Dictionary::default();
- let converter_handle = s.spawn(|| {
- let mut b_id_to_word : HashMap<u64, String> = HashMap::new();
-
- for (value, id) in b.dictionary.iter() {
- b_id_to_word.insert(id.clone(), value.clone());
- }
- b_id_to_word
- });
+ thread::scope(|s| {
+ let mut a_hash : HashSet<FileCache> = HashSet::new();
+ let mut diff : Vec<FileCache> = Vec::new();
- let dict_handle = s.spawn(|| {
- let mut dict = a.dictionary.clone();
- for (word, _) in b.dictionary.iter() {
- dict.set(word.clone());
- }
- dict
- });
+ let converter_handle = s.spawn(|| {
+ let mut b_id_to_word : HashMap<u64, String> = HashMap::new();
- for file in a.filecache.iter() {
- a_hash.insert(file.clone());
+ for (value, id) in b.dictionary.iter() {
+ b_id_to_word.insert(id.clone(), value.clone());
}
+ b_id_to_word
+ });
- for file in b.filecache.iter() {
- if !a_hash.contains(file) {
- diff.push(file.clone());
- }
+ let dict_handle = s.spawn(|| {
+ let mut dict = a.dictionary.clone();
+ for (word, _) in b.dictionary.iter() {
+ dict.set(word.clone());
}
+ dict
+ });
- let b_id_to_word = converter_handle.join().unwrap();
- dictionary = dict_handle.join().unwrap();
-
- for file in diff {
- let mut words = Vec::new();
-
- for (word_id, i) in file.vector.iter() {
- for _ in 0..*i {
- words.push(b_id_to_word.get(word_id).unwrap().clone());
- }
- }
+ for file in a.filecache.iter() {
+ a_hash.insert(file.clone());
+ }
- filecache.push(FileCache {
- path : file.path.clone(),
- vector: dictionary.vectorize_word_list(words)
- });
+ for file in b.filecache.iter() {
+ if !a_hash.contains(file) {
+ diff.push(file.clone());
}
- });
-
- Self {
- dictionary,
- filecache
}
- })
- }
- pub fn merge(indexes : Vec<Index>, callback : impl Fn(u8)) -> Self {
- let mut idxs : Vec<Index> = indexes.clone();
- let max = (idxs.len() as f32).log2().ceil() as u32;
- let mut i = 0 as u32;
+ let b_id_to_word = converter_handle.join().unwrap();
+ dictionary = dict_handle.join().unwrap();
- while idxs.len() > 1 {
- callback((i * 100 / max) as u8);
- i += 1;
- let mut idxs_handle = Vec::new();
- let mut processed = Vec::new();
+ for file in diff {
+ let mut words = Vec::new();
- for chunk in idxs.chunks(2) {
- if chunk.len() == 2 {
- let a = chunk[0].clone();
- let b = chunk[1].clone();
- idxs_handle.push(Index::merge_two(a, b));
- } else {
- for idx in chunk.iter() {
- processed.push(idx.clone())
+ for (word_id, i) in file.vector.iter() {
+ for _ in 0..*i {
+ words.push(b_id_to_word.get(word_id).unwrap().clone());
}
}
- }
- for idx_handle in idxs_handle {
- let idx : Index = idx_handle.join().unwrap();
- processed.push(idx)
+ filecache.push(FileCache {
+ path : file.path.clone(),
+ vector: dictionary.vectorize_word_list(words)
+ });
}
+ });
- idxs = processed;
+ Self {
+ dictionary,
+ filecache
}
+ }
+
+ pub fn merge(mut indexes : Vec<Index>, callback : impl Fn(u8)) -> Self {
+ let max = indexes.len();
+ let mut i = 0 as usize;
- idxs.get(0).unwrap().clone()
+ indexes.sort_by(|a, b| a.filecache.len().cmp(&b.filecache.len()));
+ let mut merged_index = indexes.pop().unwrap();
+
+ for index in indexes {
+ callback((i * 100 / max) as u8);
+ i += 1;
+ merged_index = Index::merge_two(merged_index, index);
+ }
+ callback(100);
+ merged_index
}
pub fn search(&self, search_args : Vec<String>) -> Vec<SearchResult> {