1 files changed, 170 insertions, 67 deletions
diff --git a/src/index.rs b/src/index.rs
index 27bb56b..78df3a1 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -1,6 +1,7 @@
 use std::collections::{HashSet, HashMap};
 use std::fs::File;
 use std::io::{Write, BufReader, BufRead};
+use std::sync::mpsc::{channel, Sender};
 use walkdir::*;
 use std::thread;
 use std::option::Option::None;
@@ -27,6 +28,14 @@ impl Default for Index {
     }
 }
 
+#[derive(Clone, Debug, Default, Copy)]
+pub enum GenState {
+    #[default]
+    Fetching,
+    Parsing,
+    Merging
+}
+
 impl Index {
     pub fn empty() -> Self {
         Self {
@@ -35,58 +44,99 @@ impl Index {
         }
     }
 
-    pub fn generate(input_path : &str, callback : impl Fn(u64, u64)) -> Self {
-        let mut dict = Dictionary::new();
-        let mut filecache : Vec<FileCache> = Vec::new();
+    pub fn generate(input_path : &str, callback : impl Fn(GenState, u8)) -> Self {
         let mut nof = 0;
         let mut counter = 0;
+        let mut crawler_handles = Vec::new();
+        let num_threads = thread::available_parallelism().unwrap().get();
+        let mut tx_vec : Vec<Sender<String>> = Vec::new();
 
         thread::scope(|s| {
             let mut nof_handle : Option<_> = Some(s.spawn(|| filecount(input_path)));
 
-            for entry in WalkDir::new(input_path)
-                .into_iter()
-                .filter_map(|e| e.ok()) {
-                counter += 1;
-                if entry.path().is_file() {
-                    let content : String = text::extract_text(entry.path().to_str().unwrap());
+            for _ in 0..num_threads {
+                let (tx, rx) = channel();
+                tx_vec.push(tx);
+                crawler_handles.push(thread::spawn(move || {
+                    let mut dict = Dictionary::new();
+                    let mut filecache : Vec<FileCache> = Vec::new();
 
-                    if content.is_empty() {
-                        continue
-                    }
+                    loop {
+                        let path = rx.recv().unwrap();
+                        if path.is_empty() {
+                            return Self {
+                                dictionary : dict,
+                                filecache
+                            }
+                        }
 
-                    let words : Vec<String> = splitter::split_to_words(content);
+                        let content : String = text::extract_text(path.as_str());
 
-                    for word in words.iter() {
-                        dict.set(word.clone());
-                    }
+                        if content.is_empty() {
+                            continue;
+                        }
 
-                    let fv = dict.vectorize_word_list(words.clone());
-                    filecache.push(FileCache {
-                        path : entry.path().to_str().unwrap().to_string(),
-                        vector : fv
-                    });
+                        let words : Vec<String> = splitter::split_to_words(content);
 
+                        for word in words.iter() {
+                            dict.set(word.clone());
+                        }
 
-                }
-                match nof_handle {
-                    Some(t) => {
-                        nof = t.join().unwrap();
-                        nof_handle = None;
+                        let fv = dict.vectorize_word_list(words.clone());
+                        filecache.push(FileCache {
+                            path,
+                            vector : fv
+                        });
                     }
-                    None => {
-                        callback(counter, nof);
+                }));
+            }
+
+            let mut next_crawler = 0;
+            let mut last_p = u64::MAX;
+
+            for entry in WalkDir::new(input_path)
+                .into_iter()
+                .filter_map(|e| e.ok()) {
+                counter += 1;
+                if entry.path().is_file() {
+                    tx_vec[next_crawler].send(entry.path().to_str().unwrap().to_string()).ok();
+                    next_crawler += 1;
+                    if next_crawler == num_threads {
+                        next_crawler = 0;
+                    }
+
+                    match nof_handle {
+                        Some(t) => {
+                            nof = t.join().unwrap();
+                            nof_handle = None;
+                        }
+                        None => {
+                            // Make sure that we only push a update
+                            // if there is a visual change to the number
+                            // because updating the screen takes a lot
+                            // of time.
+                            let p = counter * 100 / nof;
+                            if p != last_p {
+                                callback(GenState::Fetching, p as u8);
+                                last_p = p;
+                            }
+                        }
                     }
                 }
             }
-
-            callback(nof, nof);
         });
 
-        Self {
-            dictionary : dict,
-            filecache
+        let mut indexes = Vec::new();
+        let mut i = 0;
+
+        for handle in crawler_handles {
+            callback(GenState::Parsing, (i * 100 / num_threads) as u8);
+            tx_vec[i].send(String::new()).ok();
+            indexes.push(handle.join().unwrap());
+            i += 1;
         }
+
+        Index::merge(indexes, |p| { callback(GenState::Merging, p) })
     }
 
     pub fn from_file(path : String) -> Self {
@@ -95,7 +145,6 @@ impl Index {
         let mut filecache : Vec<FileCache> = Vec::new();
         let mut dict = Dictionary::new();
 
-
         for line in reader.lines() {
             let l = line.unwrap();
             if l.starts_with('#') {
@@ -111,51 +160,105 @@ impl Index {
         }
     }
 
-    pub fn merge(a : Index, b : Index) -> Self {
-        let mut a_hash : HashSet<FileCache> = HashSet::new();
-        let mut diff : Vec<FileCache> = Vec::new();
-        let mut dict = a.dictionary.clone();
-        let mut filecache = a.filecache.clone();
+    fn merge_two(first : Index, second : Index) -> thread::JoinHandle<Self> {
+        thread::spawn(move || {
+            let (a, b) = if first.filecache.len() < second.filecache.len() {
+                (second, first)
+            } else {
+                (first, second)
+            };
+            let mut filecache = a.filecache.clone();
+            let mut dictionary = Dictionary::default();
 
-        for file in a.filecache.iter() {
-            a_hash.insert(file.clone());
-        }
+            thread::scope(|s| {
+                let mut a_hash : HashSet<FileCache> = HashSet::new();
+                let mut diff : Vec<FileCache> = Vec::new();
 
-        for file in b.filecache.iter() {
-            if !a_hash.contains(file) {
-                diff.push(file.clone());
-            }
-        }
+                let converter_handle = s.spawn(|| {
+                    let mut b_id_to_word : HashMap<u64, String> = HashMap::new();
 
-        for (word, _) in b.dictionary.iter() {
-            dict.set(word.clone());
-        }
+                    for (value, id) in b.dictionary.iter() {
+                        b_id_to_word.insert(id.clone(), value.clone());
+                    }
+                    b_id_to_word
+                });
 
-        let mut b_id_to_word : HashMap<u64, String> = HashMap::new();
+                let dict_handle = s.spawn(|| {
+                    let mut dict = a.dictionary.clone();
+                    for (word, _) in b.dictionary.iter() {
+                        dict.set(word.clone());
+                    }
+                    dict
+                });
 
-        for (value, id) in b.dictionary.iter() {
-            b_id_to_word.insert(*id, value.clone());
-        }
+                for file in a.filecache.iter() {
+                    a_hash.insert(file.clone());
+                }
+
+                for file in b.filecache.iter() {
+                    if !a_hash.contains(file) {
+                        diff.push(file.clone());
+                    }
+                }
+
+                let b_id_to_word = converter_handle.join().unwrap();
+                dictionary = dict_handle.join().unwrap();
+
+                for file in diff {
+                    let mut words = Vec::new();
+
+                    for (word_id, i) in file.vector.iter() {
+                        for _ in 0..*i {
+                            words.push(b_id_to_word.get(word_id).unwrap().clone());
+                        }
+                    }
 
-        for file in diff {
-            let mut words = Vec::new();
+                    filecache.push(FileCache {
+                        path : file.path.clone(),
+                        vector: dictionary.vectorize_word_list(words)
+                    });
+                }
+            });
+
+            Self {
+                dictionary,
+                filecache
+            }
+        })
+    }
+
+    pub fn merge(indexes : Vec<Index>, callback : impl Fn(u8)) -> Self {
+        let mut idxs : Vec<Index> = indexes.clone();
+        let max = (idxs.len() as f32).log2().ceil() as u32;
+        let mut i = 0 as u32;
 
-            for (word_id, i) in file.vector.iter() {
-                for _ in 0..*i {
-                    words.push(b_id_to_word.get(word_id).unwrap().clone());
+        while idxs.len() > 1 {
+            callback((i * 100 / max) as u8);
+            i += 1;
+            let mut idxs_handle = Vec::new();
+            let mut processed = Vec::new();
+
+            for chunk in idxs.chunks(2) {
+                if chunk.len() == 2 {
+                    let a = chunk[0].clone();
+                    let b = chunk[1].clone();
+                    idxs_handle.push(Index::merge_two(a, b));
+                } else {
+                    for idx in chunk.iter() {
+                        processed.push(idx.clone())
+                    }
                 }
             }
 
-            filecache.push(FileCache {
-                path : file.path.clone(),
-                vector: dict.vectorize_word_list(words)
-            });
-        }
+            for idx_handle in idxs_handle {
+                let idx : Index = idx_handle.join().unwrap();
+                processed.push(idx)
+            }
 
-        Self {
-            dictionary: dict,
-            filecache
+            idxs = processed;
         }
+
+        idxs.get(0).unwrap().clone()
     }
 
     pub fn search(&self, search_args : Vec<String>) -> Vec<SearchResult> {