aboutsummaryrefslogtreecommitdiff
path: root/src/splitter.rs
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2023-07-05 23:07:26 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2023-07-05 23:07:26 +0200
commit4d577650f737daaeb477bbbd5ae2bad4f1121c38 (patch)
treeac973541e0a2d7751af4ece5f7f639e739f81fcc /src/splitter.rs
first sketch of indexer
Diffstat (limited to 'src/splitter.rs')
-rw-r--r--src/splitter.rs16
1 files changed, 16 insertions, 0 deletions
diff --git a/src/splitter.rs b/src/splitter.rs
new file mode 100644
index 0000000..64e659f
--- /dev/null
+++ b/src/splitter.rs
@@ -0,0 +1,16 @@
+use std::vec::Vec;
+
+pub fn split_to_words(data : String) -> Vec<String> {
+ let mut v : Vec<String> = data
+ .to_lowercase()
+ .split_whitespace()
+ .map(str::to_string).collect();
+
+ for word in v.iter_mut() {
+ word.retain(|c| !r#"{}[]#(),".;:?!'%|0123456789/\^"#.contains(c))
+ }
+
+ v.retain(|str| !str.is_empty());
+
+ v
+}