aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/index.rs7
-rw-r--r--src/splitter.rs5
2 files changed, 7 insertions, 5 deletions
diff --git a/src/index.rs b/src/index.rs
index 741eb7c..36f0ae2 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -49,7 +49,7 @@ impl Index {
let mut nof = 0;
let mut counter = 0;
let mut crawler_handles = Vec::new();
- let num_threads = thread::available_parallelism().unwrap().get();
+ let num_threads = thread::available_parallelism().unwrap().get().min(4);
let mut tx_vec : Vec<Sender<String>> = Vec::new();
let mut indexes = Vec::new();
@@ -93,7 +93,7 @@ impl Index {
}
let mut next_crawler = 0;
- let mut last_p = u64::MAX;
+ let mut last_p = 0;
for entry in WalkDir::new(input_path)
.into_iter()
@@ -151,10 +151,11 @@ impl Index {
}
}
+ callback(GenState::Parsing, 100);
+
join_handle.join().ok();
});
-
Index::merge(indexes.iter().collect(), |p| { callback(GenState::Merging, p) })
}
diff --git a/src/splitter.rs b/src/splitter.rs
index c4015e8..d3a0bdb 100644
--- a/src/splitter.rs
+++ b/src/splitter.rs
@@ -6,13 +6,14 @@ pub fn split_to_words(data : String) -> Vec<String> {
let mut v : Vec<String> = data
.to_lowercase()
.split_whitespace()
- .map(str::to_string).collect();
+ .map(String::from)
+ .collect();
for word in v.iter_mut() {
word.retain(|c| !r#"{}[]#(),".;:?!'%|0123456789/\^"#.contains(c))
}
- v.retain(|str| !str.is_empty() && !str.contains("--"));
+ v.retain(|str| !str.is_empty());
v
}