diff options
| -rw-r--r-- | src/index.rs | 7 | ||||
| -rw-r--r-- | src/splitter.rs | 5 |
2 files changed, 7 insertions, 5 deletions
diff --git a/src/index.rs b/src/index.rs index 741eb7c..36f0ae2 100644 --- a/src/index.rs +++ b/src/index.rs @@ -49,7 +49,7 @@ impl Index { let mut nof = 0; let mut counter = 0; let mut crawler_handles = Vec::new(); - let num_threads = thread::available_parallelism().unwrap().get(); + let num_threads = thread::available_parallelism().unwrap().get().min(4); let mut tx_vec : Vec<Sender<String>> = Vec::new(); let mut indexes = Vec::new(); @@ -93,7 +93,7 @@ impl Index { } let mut next_crawler = 0; - let mut last_p = u64::MAX; + let mut last_p = 0; for entry in WalkDir::new(input_path) .into_iter() @@ -151,10 +151,11 @@ impl Index { } } + callback(GenState::Parsing, 100); + join_handle.join().ok(); }); - Index::merge(indexes.iter().collect(), |p| { callback(GenState::Merging, p) }) } diff --git a/src/splitter.rs b/src/splitter.rs index c4015e8..d3a0bdb 100644 --- a/src/splitter.rs +++ b/src/splitter.rs @@ -6,13 +6,14 @@ pub fn split_to_words(data : String) -> Vec<String> { let mut v : Vec<String> = data .to_lowercase() .split_whitespace() - .map(str::to_string).collect(); + .map(String::from) + .collect(); for word in v.iter_mut() { word.retain(|c| !r#"{}[]#(),".;:?!'%|0123456789/\^"#.contains(c)) } - v.retain(|str| !str.is_empty() && !str.contains("--")); + v.retain(|str| !str.is_empty()); v } |