From 4d577650f737daaeb477bbbd5ae2bad4f1121c38 Mon Sep 17 00:00:00 2001 From: Nathan Reiner Date: Wed, 5 Jul 2023 23:07:26 +0200 Subject: first sketch of indexer --- src/splitter.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 src/splitter.rs (limited to 'src/splitter.rs') diff --git a/src/splitter.rs b/src/splitter.rs new file mode 100644 index 0000000..64e659f --- /dev/null +++ b/src/splitter.rs @@ -0,0 +1,16 @@ +use std::vec::Vec; + +pub fn split_to_words(data : String) -> Vec { + let mut v : Vec = data + .to_lowercase() + .split_whitespace() + .map(str::to_string).collect(); + + for word in v.iter_mut() { + word.retain(|c| !r#"{}[]#(),".;:?!'%|0123456789/\^"#.contains(c)) + } + + v.retain(|str| !str.is_empty()); + + v +} -- cgit v1.2.3-70-g09d2