diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/index.rs | 141 |
1 files changed, 60 insertions, 81 deletions
diff --git a/src/index.rs b/src/index.rs index 78df3a1..cfb612a 100644 --- a/src/index.rs +++ b/src/index.rs @@ -107,8 +107,12 @@ impl Index { match nof_handle { Some(t) => { - nof = t.join().unwrap(); - nof_handle = None; + if t.is_finished() { + nof = t.join().unwrap(); + nof_handle = None; + } else { + nof_handle = Some(t); + } } None => { // Make sure that we only push a update @@ -160,105 +164,80 @@ impl Index { } } - fn merge_two(first : Index, second : Index) -> thread::JoinHandle<Self> { - thread::spawn(move || { - let (a, b) = if first.filecache.len() < second.filecache.len() { - (second, first) - } else { - (first, second) - }; - let mut filecache = a.filecache.clone(); - let mut dictionary = Dictionary::default(); - - thread::scope(|s| { - let mut a_hash : HashSet<FileCache> = HashSet::new(); - let mut diff : Vec<FileCache> = Vec::new(); + fn merge_two(a : Index, b : Index) -> Self { + let mut filecache = a.filecache.clone(); + let mut dictionary = Dictionary::default(); - let converter_handle = s.spawn(|| { - let mut b_id_to_word : HashMap<u64, String> = HashMap::new(); - - for (value, id) in b.dictionary.iter() { - b_id_to_word.insert(id.clone(), value.clone()); - } - b_id_to_word - }); + thread::scope(|s| { + let mut a_hash : HashSet<FileCache> = HashSet::new(); + let mut diff : Vec<FileCache> = Vec::new(); - let dict_handle = s.spawn(|| { - let mut dict = a.dictionary.clone(); - for (word, _) in b.dictionary.iter() { - dict.set(word.clone()); - } - dict - }); + let converter_handle = s.spawn(|| { + let mut b_id_to_word : HashMap<u64, String> = HashMap::new(); - for file in a.filecache.iter() { - a_hash.insert(file.clone()); + for (value, id) in b.dictionary.iter() { + b_id_to_word.insert(id.clone(), value.clone()); } + b_id_to_word + }); - for file in b.filecache.iter() { - if !a_hash.contains(file) { - diff.push(file.clone()); - } + let dict_handle = s.spawn(|| { + let mut dict = a.dictionary.clone(); + for (word, _) in b.dictionary.iter() { + dict.set(word.clone()); } + dict + }); - let b_id_to_word = converter_handle.join().unwrap(); - dictionary = dict_handle.join().unwrap(); - - for file in diff { - let mut words = Vec::new(); - - for (word_id, i) in file.vector.iter() { - for _ in 0..*i { - words.push(b_id_to_word.get(word_id).unwrap().clone()); - } - } + for file in a.filecache.iter() { + a_hash.insert(file.clone()); + } - filecache.push(FileCache { - path : file.path.clone(), - vector: dictionary.vectorize_word_list(words) - }); + for file in b.filecache.iter() { + if !a_hash.contains(file) { + diff.push(file.clone()); } - }); - - Self { - dictionary, - filecache } - }) - } - pub fn merge(indexes : Vec<Index>, callback : impl Fn(u8)) -> Self { - let mut idxs : Vec<Index> = indexes.clone(); - let max = (idxs.len() as f32).log2().ceil() as u32; - let mut i = 0 as u32; + let b_id_to_word = converter_handle.join().unwrap(); + dictionary = dict_handle.join().unwrap(); - while idxs.len() > 1 { - callback((i * 100 / max) as u8); - i += 1; - let mut idxs_handle = Vec::new(); - let mut processed = Vec::new(); + for file in diff { + let mut words = Vec::new(); - for chunk in idxs.chunks(2) { - if chunk.len() == 2 { - let a = chunk[0].clone(); - let b = chunk[1].clone(); - idxs_handle.push(Index::merge_two(a, b)); - } else { - for idx in chunk.iter() { - processed.push(idx.clone()) + for (word_id, i) in file.vector.iter() { + for _ in 0..*i { + words.push(b_id_to_word.get(word_id).unwrap().clone()); } } - } - for idx_handle in idxs_handle { - let idx : Index = idx_handle.join().unwrap(); - processed.push(idx) + filecache.push(FileCache { + path : file.path.clone(), + vector: dictionary.vectorize_word_list(words) + }); } + }); - idxs = processed; + Self { + dictionary, + filecache } + } + + pub fn merge(mut indexes : Vec<Index>, callback : impl Fn(u8)) -> Self { + let max = indexes.len(); + let mut i = 0 as usize; - idxs.get(0).unwrap().clone() + indexes.sort_by(|a, b| a.filecache.len().cmp(&b.filecache.len())); + let mut merged_index = indexes.pop().unwrap(); + + for index in indexes { + callback((i * 100 / max) as u8); + i += 1; + merged_index = Index::merge_two(merged_index, index); + } + callback(100); + merged_index } pub fn search(&self, search_args : Vec<String>) -> Vec<SearchResult> { |