4 files changed, 105 insertions, 120 deletions
diff --git a/src/index.rs b/src/index.rs
index 8fb34fe..cc86f0c 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -1,16 +1,13 @@
-use std::collections::hash_map::DefaultHasher;
-use std::hash::{Hash, Hasher};
+use std::collections::HashMap;
+use std::hash::Hash;
 use std::fs::File;
-use std::io::{BufWriter, Write};
-use std::sync::mpsc::{channel, Sender};
-use std::time::Duration;
-use walkdir::*;
+use std::io::BufWriter;
 use std::thread;
-use std::option::Option::None;
-use crate::vector::FileVector;
+use walkdir::*;
+use hash32::Hasher;
+use crate::vector::{FileVector, Indexer, Count};
 use crate::filecache::FileCache;
 use crate::searchresult::SearchResult;
-use crate::filecounter::filecount;
 use crate::text;
 use crate::splitter;
 use crate::vector;
@@ -44,100 +41,55 @@ impl Index {
     }
 
     pub fn generate(input_path : &str, callback : impl Fn(GenState, u8)) -> Self {
-        let mut nof = 1;
-        let mut counter = 0;
-        let mut crawler_handles = Vec::new();
+        let mut nof : usize = 0;
         let num_threads = thread::available_parallelism().unwrap().get();
-        let mut tx_vec : Vec<Sender<String>> = Vec::new();
-        let mut indexes = Vec::new();
-
-        thread::scope(|s| {
-            let mut nof_handle : Option<_> = Some(s.spawn(|| filecount(input_path)));
-            let (status_tx, status_rx) = channel();
+        let mut paths = Vec::new();
+        let (result_tx, result_rx) = std::sync::mpsc::channel();
 
-            for _ in 0..num_threads {
-                let (tx, rx) = channel();
-                tx_vec.push(tx);
-                let status_tx = status_tx.clone();
-                crawler_handles.push(thread::spawn(move || {
-                    let mut filecache : Vec<FileCache> = Vec::new();
-
-                    loop {
-                        let path = rx.recv().unwrap();
-                        if path.is_empty() {
-                            return Self {
-                                filecache
-                            }
-                        }
+        callback(GenState::Fetching, 0);
+        for entry in WalkDir::new(input_path)
+            .into_iter()
+                .filter_map(|e| e.ok()) {
+                    if entry.path().is_file() {
+                        nof += 1;
+                        paths.push(entry.path().to_str().unwrap().to_string());
+                    }
+                }
+        callback(GenState::Fetching, 100);
 
-                        let content : String = text::extract_text(path.as_str());
+        let chunks = paths.chunks(paths.len() / num_threads);
+        let mut filecache = Vec::with_capacity(nof);
 
-                        let _ = status_tx.send(());
+        thread::scope(|s| {
+            for chunk in chunks {
+                let result_tx = result_tx.clone();
+                s.spawn(move || {
+                    for path in chunk {
+                        let content : String = text::extract_text(path);
 
                         if content.is_empty() {
+                            result_tx.send(FileCache {
+                                path: "".to_string(),
+                                vector : FileVector::default()
+                            }).ok();
                             continue;
                         }
 
                         let words : Vec<String> = splitter::split_to_words(content);
                         let fv = FileVector::from_words(words);
-                        filecache.push(FileCache {
-                            path,
+                        result_tx.send(FileCache {
+                            path: "".to_string(),
                             vector : fv
-                        });
-                    }
-                }));
-            }
-
-            let mut next_crawler = 0;
-            let mut last_p = 0;
-
-            for entry in WalkDir::new(input_path)
-                .into_iter()
-                .filter_map(|e| e.ok()) {
-                counter += 1;
-                if entry.path().is_file() {
-                    tx_vec[next_crawler].send(entry.path().to_str().unwrap().to_string()).ok();
-                    next_crawler += 1;
-                    if next_crawler == num_threads {
-                        next_crawler = 0;
-                    }
-
-                    match nof_handle {
-                        Some(t) => {
-                            if t.is_finished() {
-                                nof = t.join().unwrap();
-                                nof_handle = None;
-                            } else {
-                                nof_handle = Some(t);
-                            }
-                        }
-                        None => {
-                            // Make sure that we only push a update
-                            // if there is a visual change to the number
-                            // because updating the screen takes a lot
-                            // of time.
-                            let p = counter * 100 / nof;
-                            if p != last_p {
-                                callback(GenState::Fetching, p as u8);
-                                last_p = p;
-                            }
-                        }
+                        }).ok();
                     }
-                }
+                });
             }
 
-            let join_handle = s.spawn(|| {
-                for (i, handle) in crawler_handles.into_iter().enumerate() {
-                    tx_vec[i].send(String::new()).ok();
-                    indexes.push(handle.join().unwrap());
-                }
-            });
-
-
-            let mut i = 0;
+            let mut i : usize = 0;
             let mut last_p = 0;
-            while !join_handle.is_finished() {
-                if status_rx.recv_timeout(Duration::from_millis(20)).is_ok() {
+            while i != nof {
+                if let Ok(result) = result_rx.recv() {
+                    filecache.push(result);
                     i += 1;
                     let p = i * 100 / nof;
                     if p != last_p {
@@ -148,11 +100,9 @@ impl Index {
             }
 
             callback(GenState::Parsing, 100);
-
-            join_handle.join().ok();
         });
 
-        Index::merge(indexes, |p| { callback(GenState::Merging, p) })
+        Self { filecache }
     }
 
     pub fn from_file(path : &String) -> Self {
@@ -168,32 +118,35 @@ impl Index {
         let max = indexes.len();
         let mut filecache = Vec::new();
 
-
         for (i, index) in indexes.into_iter().enumerate() {
             callback((i * 100 / max) as u8);
             filecache.extend(index.filecache);
         }
 
         callback(100);
+
         Self { filecache }
     }
 
     pub fn search(&self, search_args : Vec<String>) -> Vec<SearchResult> {
-        let mut v : FileVector = FileVector::new();
-        let mut opt : FileVector = FileVector::new();
+        let mut v : HashMap<Indexer, Count> = HashMap::new();
+        let mut opt : HashMap<Indexer, Count> = HashMap::new();
 
         for arg in search_args {
-            let mut hasher = DefaultHasher::new();
+            let mut hasher = hash32::FnvHasher::default();
             let a = arg.trim_start_matches('+');
             a.hash(&mut hasher);
-            let value = hasher.finish();
+            let value = hasher.finish32();
             if arg.starts_with('+') {
-                opt.insert(value, 1);
+                opt.insert(value as Indexer, 1);
             } else {
-                v.insert(value, 1);
+                v.insert(value as Indexer, 1);
             }
         }
 
+        let v = FileVector::from_hashmap(v);
+        let opt = FileVector::from_hashmap(opt);
+
         let mut results : Vec<SearchResult> = Vec::new();
 
         for filecache in self.filecache.iter() {
@@ -210,10 +163,8 @@ impl Index {
 
     pub fn save(&self, path: String) {
         let index_file = File::create(path).expect("could not open output file");
-        let mut file = BufWriter::new(index_file);
-
-        file.write_all(&bincode::serialize(&self.filecache).unwrap()).ok();
-        file.flush().ok();
+        let file = BufWriter::new(index_file);
+        bincode::serialize_into(file, &self.filecache).ok();
     }
 
     pub fn num_files(&self) -> usize {
diff --git a/src/main.rs b/src/main.rs
index 3f18de9..1a8a7c9 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,4 @@
+#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")] // hide console window on Windows in release
 pub mod vector;
 pub mod text;
 pub mod splitter;
@@ -68,6 +69,6 @@ fn main() {
                         ).save(merged);
         }
     } else {
-        let _ = gui::run();
+        gui::run();
     }
 }
diff --git a/src/searchresult.rs b/src/searchresult.rs
index 23e8d02..0630791 100644
--- a/src/searchresult.rs
+++ b/src/searchresult.rs
@@ -2,6 +2,6 @@
 /// function.
 #[derive(Debug, Clone)]
 pub struct SearchResult {
-    pub priority : u64,
+    pub priority : u32,
     pub path : String
 }
diff --git a/src/vector.rs b/src/vector.rs
index fa0a139..c85c50a 100644
--- a/src/vector.rs
+++ b/src/vector.rs
@@ -1,8 +1,11 @@
 use std::collections::HashMap;
-use std::collections::hash_map::DefaultHasher;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
 use std::ops::{Deref, DerefMut};
 use serde::{Deserialize, Serialize};
+use hash32::Hasher;
+
+pub type Count = u8;
+pub type Indexer = u32;
 
 /// Represents the content of a cached file.
 /// It is stored as a HashMap, because we do not
@@ -10,11 +13,18 @@ use serde::{Deserialize, Serialize};
 /// of storage.
 #[derive(Default, Clone, Debug, Deserialize, Serialize)]
 pub struct FileVector {
-    data : HashMap<u64, u64>
+    data : Vec<FileVectorEntry>
+}
+
+#[repr(packed)]
+#[derive(Default, Clone, Debug, Deserialize, Serialize)]
+pub struct FileVectorEntry {
+    index: u32,
+    count: Count,
 }
 
 impl Deref for FileVector {
-    type Target = HashMap<u64, u64>;
+    type Target = Vec<FileVectorEntry>;
     fn deref(&self) -> &Self::Target {
         &self.data
     }
@@ -28,19 +38,32 @@ impl DerefMut for FileVector {
 
 impl FileVector {
     pub fn new() -> Self {
-        Self { data : HashMap::new() }
+        Self { data : Vec::new() }
+    }
+
+    pub fn to_hashmap(&self) -> HashMap<Indexer, Count> {
+        let mut map = HashMap::new();
+
+        for e in self.data.iter() {
+            map.insert(e.index, e.count);
+        }
+
+        map
     }
 
     pub fn from_words(words: Vec<String>) -> Self {
-        let mut data = HashMap::new();
+        let mut data : HashMap<Indexer, Count> = HashMap::new();
 
         for word in words {
-            let mut hasher = DefaultHasher::new();
+            let mut hasher = hash32::FnvHasher::default();
             word.hash(&mut hasher);
-            let k = hasher.finish();
-            match data.entry(k) {
+            let k = hasher.finish32();
+            match data.entry(k as Indexer) {
                 std::collections::hash_map::Entry::Occupied(mut e) => {
-                    e.insert(e.get() + 1);
+                    let i = *e.get();
+                    if  i == Count::MAX {
+                        e.insert((i + 1) as Count);
+                    }
                 }
                 std::collections::hash_map::Entry::Vacant(e) => {
                     e.insert(1);
@@ -48,26 +71,36 @@ impl FileVector {
             }
         }
 
-        Self { data }
+        FileVector::from_hashmap(data)
+    }
+
+    pub fn from_hashmap(map : HashMap<Indexer, Count>) -> Self {
+        Self { data : Vec::from_iter(map.iter().map(|e| {
+            FileVectorEntry { index: *e.0, count: *e.1 }
+        }))}
     }
 }
 
-pub fn scalar_product(a : &FileVector, b : &FileVector) -> u64 {
-    let mut c = 0;
+pub fn scalar_product(a : &FileVector, b : &FileVector) -> u32 {
+    let a = a.to_hashmap();
+    let b = b.to_hashmap();
+    let mut c : u32 = 0;
     for (i, x) in a.iter() {
-        c += x * (b.get(i).unwrap_or(&0));
+        c += (x * (b.get(i).unwrap_or(&0))) as u32;
     }
     c
 }
 
-pub fn match_vector(query : &FileVector, v : &FileVector) -> u64 {
-    let mut c = 0;
+pub fn match_vector(query : &FileVector, v : &FileVector) -> u32 {
+    let query = query.to_hashmap();
+    let v = v.to_hashmap();
+    let mut c : u32 = 0;
     for (i, x) in query.iter() {
         let s = x * (v.get(i).unwrap_or(&0));
         if s == 0 {
             return 0
         } else {
-            c += s;
+            c += s as u32;
         }
     }
     c