use std::fmt::{Formatter, Debug, Result}; use std::fs::File; use std::io::{Write, BufReader, BufRead}; use walkdir::*; use std::thread; use std::option::Option::None; use crate::vector::FileVector; use crate::dictionary::Dictionary; use crate::filecache::FileCache; use crate::searchresult::SearchResult; use crate::filecounter::filecount; use crate::text; use crate::splitter; use crate::vector; /// Represents a Index which is ether generated /// or read from a file. #[derive(Clone, Debug)] pub struct Index { dictionary : Dictionary, filecache : Vec, } impl Default for Index { fn default() -> Self { Self::empty() } } impl Index { pub fn empty() -> Self { Self { dictionary : Dictionary::new(), filecache : Vec::new() } } pub fn generate(input_path : &str, index_path : &str, callback : impl Fn(u64, u64)) -> Self { let mut index_file = File::create(index_path).unwrap(); let mut dict = Dictionary::new(); let mut filecache : Vec = Vec::new(); let mut nof = 0; let mut counter = 0; thread::scope(|s| { let mut nof_handle : Option<_> = Some(s.spawn(|| filecount(input_path))); for entry in WalkDir::new(input_path) .into_iter() .filter_map(|e| e.ok()) { counter += 1; if entry.path().is_file() { let content : String = text::extract_text(entry.path().to_str().unwrap()); if content.is_empty() { continue } let words : Vec = splitter::split_to_words(content); for word in words.iter() { dict.set(word.clone()); } let fv = dict.vectorize_word_list(words.clone()); writeln!( index_file, "{}, {}", entry.path() .to_str() .unwrap() .replace(',', "\0"), fv.stringify() ).ok(); filecache.push(FileCache { path : entry.path().to_str().unwrap().to_string(), vector : fv }); match nof_handle { Some(t) => { nof = t.join().unwrap(); nof_handle = None; } None => { callback(counter, nof); } } } } callback(nof, nof); let dict_list : Vec = dict.to_list(); writeln!(index_file, "#{}", dict_list.join(",")).ok(); }); Self { dictionary : dict, filecache } } pub fn from_file(path : &str) -> Self { let index_file = File::open(path).expect("could not open index file"); let reader = BufReader::new(index_file); let mut filecache : Vec = Vec::new(); let mut dict = Dictionary::new(); for line in reader.lines() { let l = line.unwrap(); if l.starts_with('#') { dict = Dictionary::from_line(l.strip_prefix('#').unwrap()); } else { filecache.push(FileCache::from_line(l)); } } Self { dictionary : dict, filecache } } pub fn search(&self, search_args : Vec) -> Vec { let mut v : FileVector = FileVector::new(); for arg in search_args { if let Some(value) = self.dictionary.get(arg.to_string()) { v.insert(*value, 1); } } let mut results : Vec = Vec::new(); for filecache in self.filecache.iter() { let mut r = SearchResult { priority : 0, path : filecache.path.clone() }; r.priority = vector::scalar_product(&v, &filecache.vector); if r.priority > 0 { results.push(r); } } results.sort_by(|a, b| b.priority.cmp(&a.priority)); results } }