diff options
| author | Nathan Reiner <nathan@nathanreiner.xyz> | 2023-07-05 23:07:26 +0200 |
|---|---|---|
| committer | Nathan Reiner <nathan@nathanreiner.xyz> | 2023-07-05 23:07:26 +0200 |
| commit | 4d577650f737daaeb477bbbd5ae2bad4f1121c38 (patch) | |
| tree | ac973541e0a2d7751af4ece5f7f639e739f81fcc /src/splitter.rs | |
first sketch of indexer
Diffstat (limited to 'src/splitter.rs')
| -rw-r--r-- | src/splitter.rs | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/src/splitter.rs b/src/splitter.rs new file mode 100644 index 0000000..64e659f --- /dev/null +++ b/src/splitter.rs @@ -0,0 +1,16 @@ +use std::vec::Vec; + +pub fn split_to_words(data : String) -> Vec<String> { + let mut v : Vec<String> = data + .to_lowercase() + .split_whitespace() + .map(str::to_string).collect(); + + for word in v.iter_mut() { + word.retain(|c| !r#"{}[]#(),".;:?!'%|0123456789/\^"#.contains(c)) + } + + v.retain(|str| !str.is_empty()); + + v +} |