aboutsummaryrefslogtreecommitdiff
path: root/src/index.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/index.rs')
-rw-r--r--src/index.rs133
1 files changed, 133 insertions, 0 deletions
diff --git a/src/index.rs b/src/index.rs
new file mode 100644
index 0000000..7ca31f1
--- /dev/null
+++ b/src/index.rs
@@ -0,0 +1,133 @@
+use std::fs::File;
+use std::io::{Write, BufReader, BufRead};
+use walkdir::*;
+use std::thread;
+use std::option::Option::None;
+use crate::vector::FileVector;
+use crate::dictionary::Dictionary;
+use crate::filecache::FileCache;
+use crate::searchresult::SearchResult;
+use crate::filecounter::filecount;
+use crate::text;
+use crate::splitter;
+use crate::vector;
+
+pub struct Index {
+ dictionary : Dictionary,
+ filecache : Vec<FileCache>,
+}
+
+impl Index {
+ pub fn generate(input_path : &str, index_path : &str, callback : impl Fn(u64, u64)) -> Self {
+ let mut index_file = File::create(index_path).unwrap();
+ let mut dict = Dictionary::new();
+ let mut filecache : Vec<FileCache> = Vec::new();
+ let mut nof = 0;
+ let mut counter = 0;
+
+ thread::scope(|s| {
+ let mut nof_handle : Option<_> = Some(s.spawn(|| filecount(input_path)));
+
+ for entry in WalkDir::new(input_path)
+ .into_iter()
+ .filter_map(|e| e.ok()) {
+ counter += 1;
+ if entry.path().is_file() {
+ let content : String = text::extract_text(entry.path().to_str().unwrap());
+
+ if content.is_empty() {
+ continue
+ }
+
+ let words : Vec<String> = splitter::split_to_words(content);
+
+ for word in words.iter() {
+ dict.set(word.clone());
+ }
+
+ let fv = dict.vectorize_word_list(words.clone());
+ writeln!(
+ index_file,
+ "{}, {}",
+ entry.path()
+ .to_str()
+ .unwrap()
+ .replace(",", "\0"),
+ fv.to_hex()
+ ).ok();
+
+ filecache.push(FileCache {
+ path : entry.path().to_str().unwrap().to_string(),
+ vector : fv
+ });
+
+
+ match nof_handle {
+ Some(t) => {
+ nof = t.join().unwrap();
+ nof_handle = None;
+ }
+ None => {
+ callback(counter, nof);
+ }
+ }
+ }
+ }
+
+ callback(nof, nof);
+
+ let dict_list : Vec<String> = dict.to_list();
+ writeln!(index_file, "#{}", dict_list.join(",")).ok();
+ });
+
+ Self {
+ dictionary : dict,
+ filecache
+ }
+ }
+
+ pub fn from_file(path : &str) -> Self {
+ let index_file = File::open(path).expect("could not open index file");
+ let reader = BufReader::new(index_file);
+ let mut filecache : Vec<FileCache> = Vec::new();
+ let mut dict = Dictionary::new();
+
+
+ for line in reader.lines() {
+ let l = line.unwrap();
+ if l.starts_with("#") {
+ dict = Dictionary::from_line(&l.strip_prefix("#").unwrap());
+ } else {
+ filecache.push(FileCache::from_line(l));
+ }
+ }
+
+ Self {
+ dictionary : dict,
+ filecache
+ }
+ }
+
+ pub fn search(&self, search_args : Vec<String>) -> Vec<SearchResult> {
+ let mut v : FileVector = FileVector::new();
+
+ for arg in search_args {
+ match self.dictionary.get(arg.to_string()) {
+ Some(value) => { v.insert(*value, 1); }
+ None => {}
+ }
+ }
+
+ let mut results : Vec<SearchResult> = Vec::new();
+
+ for filecache in self.filecache.iter() {
+ let mut r = SearchResult { priority : 0, path : filecache.path.clone() };
+ r.priority = vector::scalar_product(&v, &filecache.vector);
+ if r.priority > 0 {
+ results.push(r);
+ }
+ }
+ results.sort_by(|a, b| b.priority.cmp(&a.priority));
+ results
+ }
+}