diff options
Diffstat (limited to 'src/main.rs')
| -rw-r--r-- | src/main.rs | 127 |
1 files changed, 32 insertions, 95 deletions
diff --git a/src/main.rs b/src/main.rs index 2e49fd4..261008b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,110 +6,47 @@ pub mod filecache; pub mod searchresult; pub mod filecounter; pub mod extractors; +pub mod index; -use vector::FileVector; -use dictionary::Dictionary; -use filecache::FileCache; -use searchresult::SearchResult; -use filecounter::filecount; -use std::fs::File; -use std::io::{Write, BufReader, BufRead}; -use walkdir::*; -use std::thread; -use std::option::Option::None; +use index::Index; +use std::io::*; +use std::env; -fn generate_index(input_path : &str, index_path : &str) { - let mut index_file = File::create(index_path).unwrap(); - let mut dict = Dictionary::new(); - let mut nof = 0; - let mut counter = 0; - - thread::scope(|s| { - let mut nof_handle : Option<_> = Some(s.spawn(|| filecount(input_path))); - - for entry in WalkDir::new(input_path).into_iter().filter_map(|e| e.ok()) { - counter += 1; - if entry.path().is_file() { - let content : String = text::extract_text(entry.path().to_str().unwrap()); - - if content.is_empty() { - continue - } - - let words : Vec<String> = splitter::split_to_words(content); - - for word in words.iter() { - dict.set(word.clone()); - } - - let fv = dict.vectorize_word_list(words.clone()); - writeln!(index_file, "{}, {}", entry.path().to_str().unwrap().replace(",", "\0"), fv.to_hex()).ok(); +fn main() { + let args: Vec<_> = env::args().collect(); + if args.len() > 1 { + let cmd = args.get(1).unwrap(); - match nof_handle { - Some(t) => { - nof = t.join().unwrap(); - nof_handle = None; - } - None => { - eprint!("\r\x1b[2K{} of {} files indexed ({}%)", counter, nof, (counter * 100) / nof); - std::io::stdout().flush().ok(); - } - } + if cmd == "-g" { + if args.len() != 4 { + eprintln!("{} -g <input> <indexfile>", args.get(0).unwrap()); + return; } - } - - eprintln!("\r\x1b[2Kall files indexed (100%)"); - - let dict_list : Vec<String> = dict.to_list(); - writeln!(index_file, "#{}", dict_list.join(",")).ok(); - }); -} - -fn search(index_path : &str, search_args : Vec<&str>) { - let index_file = File::open(index_path).expect("could not open index file"); - let reader = BufReader::new(index_file); - let mut filecaches : Vec<FileCache> = Vec::new(); - let mut dict = Dictionary::new(); - - for line in reader.lines() { - let l = line.unwrap(); - if l.starts_with("#") { - dict = Dictionary::from_line(&l.strip_prefix("#").unwrap()); - } else { - filecaches.push(FileCache::from_line(l)); - } - } - - let mut v : FileVector = FileVector::new(); - - for arg in search_args { - v.insert(dict.get(arg.to_string()), 1); - } + let input = args.get(2).unwrap(); + let file = args.get(3).unwrap(); + let _ = Index::generate(input, file, |counter, nof| { + eprint!("\r\x1b[2K{} of {} files indexed ({}%)", counter, nof, (counter * 100) / nof); + std::io::stdout().flush().ok(); + }); + } else if cmd == "-s" { + if args.len() < 4 { + eprintln!("{} -s <indexfile> ...", args.get(0).unwrap()); + return; + } - let mut results : Vec<SearchResult> = Vec::new(); + let file = args.get(2).unwrap().clone(); + let v = args.get(3..(args.len())).unwrap(); + let search = v.join(" "); + let searchvec = splitter::split_to_words(search); - for filecache in filecaches.iter() { - let mut r = SearchResult { priority : 0, path : filecache.path.clone() }; - r.priority = vector::scalar_product(&v, &filecache.vector); - if r.priority > 0 { - results.push(r); + let idx = Index::from_file(file.as_str()); + let results = idx.search(searchvec); + for result in results { + println!("{}", result.path); + } } - } - results.sort_by(|a, b| b.priority.cmp(&a.priority)); - for result in results.iter() { - println!("{}", result.path); } - - println!("{} results", results.len()) - -} - -fn main() { - println!("Generating Index..."); - generate_index("/home/n8", "index.idxs"); - println!("Searching..."); - search("index.idxs", vec!["welt"]); } |