diff options
| author | Nathan Reiner <nathan@nathanreiner.xyz> | 2023-07-05 23:07:26 +0200 |
|---|---|---|
| committer | Nathan Reiner <nathan@nathanreiner.xyz> | 2023-07-05 23:07:26 +0200 |
| commit | 4d577650f737daaeb477bbbd5ae2bad4f1121c38 (patch) | |
| tree | ac973541e0a2d7751af4ece5f7f639e739f81fcc /src/main.rs | |
first sketch of indexer
Diffstat (limited to 'src/main.rs')
| -rw-r--r-- | src/main.rs | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..f0975ad --- /dev/null +++ b/src/main.rs @@ -0,0 +1,87 @@ +pub mod vector; +pub mod dictionary; +pub mod text; +pub mod splitter; +pub mod filecache; +pub mod searchresult; + +use vector::FileVector; +use dictionary::Dictionary; +use filecache::FileCache; +use searchresult::SearchResult; +use std::fs::File; +use std::io::{Write, BufReader, BufRead}; +use walkdir::*; + +fn generate_index(input_path : &str, index_path : &str) { + let mut index_file = File::create(index_path).unwrap(); + let mut dict = Dictionary::new(); + + for entry in WalkDir::new(input_path).into_iter().filter_map(|e| e.ok()) { + if entry.path().is_file() { + let content : String = text::extract_text(entry.path().to_str().unwrap()); + if !content.is_empty() { + let words : Vec<String> = splitter::split_to_words(content); + + for word in words.iter() { + let w = word.clone(); + dict.set(w); + } + + let fv : FileVector = dict.vectorize_word_list(words); + writeln!(index_file, "{}, {}", entry.path().to_str().unwrap().replace(",", "\0"), fv.to_hex()).ok(); + } + } + } + + let dict_list : Vec<String> = dict.to_list(); + writeln!(index_file, "#{}", dict_list.join(",")).ok(); +} + +fn search(index_path : &str, search_args : Vec<&str>) { + let index_file = File::open(index_path).expect("could not open index file"); + let reader = BufReader::new(index_file); + let mut filecaches : Vec<FileCache> = Vec::new(); + let mut dict = Dictionary::new(); + + + for line in reader.lines() { + let l = line.unwrap(); + if l.starts_with("#") { + dict = Dictionary::from_line(&l.strip_prefix("#").unwrap()); + } else { + filecaches.push(FileCache::from_line(l)); + } + } + + let mut v : FileVector = FileVector::new(); + + for arg in search_args { + v.insert(dict.get(arg.to_string()), 1); + } + + let mut results : Vec<SearchResult> = Vec::new(); + + for filecache in filecaches.iter() { + let mut r = SearchResult { priority : 0, path : filecache.path.clone() }; + r.priority = vector::scalar_product(&v, &filecache.vector); + if r.priority > 0 { + results.push(r); + } + } + results.sort_by(|a, b| b.priority.cmp(&a.priority)); + + for result in results.iter() { + println!("{}", result.path); + } + + println!("{} results", results.len()) + +} + +fn main() { + println!("Generating Index..."); + generate_index("/home/n8", "index.idxs"); + println!("Searching..."); + search("index.idxs", vec!["one", "difficult", "under", "linux"]); +} |