pub mod vector; pub mod dictionary; pub mod text; pub mod splitter; pub mod filecache; pub mod searchresult; pub mod filecounter; use vector::FileVector; use dictionary::Dictionary; use filecache::FileCache; use searchresult::SearchResult; use filecounter::filecount; use std::fs::File; use std::io::{Write, BufReader, BufRead}; use walkdir::*; use std::thread; use std::option::Option::None; fn generate_index(input_path : &str, index_path : &str) { let mut index_file = File::create(index_path).unwrap(); let mut dict = Dictionary::new(); let mut nof = 0; let mut counter = 0; thread::scope(|s| { let mut nof_handle : Option<_> = Some(s.spawn(|| filecount(input_path))); for entry in WalkDir::new(input_path).into_iter().filter_map(|e| e.ok()) { if entry.path().is_file() { let content : String = text::extract_text(entry.path().to_str().unwrap()); let words : Vec = splitter::split_to_words(content); for word in words.iter() { dict.set(word.clone()); } let fv = dict.vectorize_word_list(words.clone()); writeln!(index_file, "{}, {}", entry.path().to_str().unwrap().replace(",", "\0"), fv.to_hex()).ok(); counter += 1; match nof_handle { Some(t) => { nof = t.join().unwrap(); nof_handle = None; } None => { print!("\r\x1b[2K{} of {} files indexed ({}%)", counter, nof, (counter * 100) / nof); std::io::stdout().flush().ok(); } } } } println!("\r\x1b[2K{} of {} files indexed ({}%)", counter, nof, (counter * 100) / nof); let dict_list : Vec = dict.to_list(); writeln!(index_file, "#{}", dict_list.join(",")).ok(); }); } fn search(index_path : &str, search_args : Vec<&str>) { let index_file = File::open(index_path).expect("could not open index file"); let reader = BufReader::new(index_file); let mut filecaches : Vec = Vec::new(); let mut dict = Dictionary::new(); for line in reader.lines() { let l = line.unwrap(); if l.starts_with("#") { dict = Dictionary::from_line(&l.strip_prefix("#").unwrap()); } else { filecaches.push(FileCache::from_line(l)); } } let mut v : FileVector = FileVector::new(); for arg in search_args { v.insert(dict.get(arg.to_string()), 1); } let mut results : Vec = Vec::new(); for filecache in filecaches.iter() { let mut r = SearchResult { priority : 0, path : filecache.path.clone() }; r.priority = vector::scalar_product(&v, &filecache.vector); if r.priority > 0 { results.push(r); } } results.sort_by(|a, b| b.priority.cmp(&a.priority)); for result in results.iter() { println!("{}", result.path); } println!("{} results", results.len()) } fn main() { println!("Generating Index..."); generate_index("/home/n8", "index.idxs"); //println!("Searching..."); //search("index.idxs", vec!["one", "difficult", "under", "linux"]); }