pub mod vector; pub mod dictionary; pub mod text; pub mod splitter; pub mod filecache; pub mod searchresult; use vector::FileVector; use dictionary::Dictionary; use filecache::FileCache; use searchresult::SearchResult; use std::fs::File; use std::io::{Write, BufReader, BufRead}; use walkdir::*; fn generate_index(input_path : &str, index_path : &str) { let mut index_file = File::create(index_path).unwrap(); let mut dict = Dictionary::new(); for entry in WalkDir::new(input_path).into_iter().filter_map(|e| e.ok()) { if entry.path().is_file() { let content : String = text::extract_text(entry.path().to_str().unwrap()); if !content.is_empty() { let words : Vec = splitter::split_to_words(content); for word in words.iter() { let w = word.clone(); dict.set(w); } let fv : FileVector = dict.vectorize_word_list(words); writeln!(index_file, "{}, {}", entry.path().to_str().unwrap().replace(",", "\0"), fv.to_hex()).ok(); } } } let dict_list : Vec = dict.to_list(); writeln!(index_file, "#{}", dict_list.join(",")).ok(); } fn search(index_path : &str, search_args : Vec<&str>) { let index_file = File::open(index_path).expect("could not open index file"); let reader = BufReader::new(index_file); let mut filecaches : Vec = Vec::new(); let mut dict = Dictionary::new(); for line in reader.lines() { let l = line.unwrap(); if l.starts_with("#") { dict = Dictionary::from_line(&l.strip_prefix("#").unwrap()); } else { filecaches.push(FileCache::from_line(l)); } } let mut v : FileVector = FileVector::new(); for arg in search_args { v.insert(dict.get(arg.to_string()), 1); } let mut results : Vec = Vec::new(); for filecache in filecaches.iter() { let mut r = SearchResult { priority : 0, path : filecache.path.clone() }; r.priority = vector::scalar_product(&v, &filecache.vector); if r.priority > 0 { results.push(r); } } results.sort_by(|a, b| b.priority.cmp(&a.priority)); for result in results.iter() { println!("{}", result.path); } println!("{} results", results.len()) } fn main() { println!("Generating Index..."); generate_index("/home/n8", "index.idxs"); println!("Searching..."); search("index.idxs", vec!["one", "difficult", "under", "linux"]); }