aboutsummaryrefslogtreecommitdiff
path: root/src/main.rs
blob: f0975ade28fc7e46d82c69004931ff92cd0e4e7c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
pub mod vector;
pub mod dictionary;
pub mod text;
pub mod splitter;
pub mod filecache;
pub mod searchresult;

use vector::FileVector;
use dictionary::Dictionary;
use filecache::FileCache;
use searchresult::SearchResult;
use std::fs::File;
use std::io::{Write, BufReader, BufRead};
use walkdir::*;

fn generate_index(input_path : &str, index_path : &str) {
    let mut index_file = File::create(index_path).unwrap();
    let mut dict = Dictionary::new();

    for entry in WalkDir::new(input_path).into_iter().filter_map(|e| e.ok()) {
        if entry.path().is_file() {
            let content : String = text::extract_text(entry.path().to_str().unwrap());
            if !content.is_empty() {
                let words : Vec<String> = splitter::split_to_words(content);

                for word in words.iter() {
                    let w = word.clone();
                    dict.set(w);
                }

                let fv : FileVector = dict.vectorize_word_list(words);
                writeln!(index_file, "{}, {}", entry.path().to_str().unwrap().replace(",", "\0"), fv.to_hex()).ok();
            }
        }
    }

    let dict_list : Vec<String> = dict.to_list();
    writeln!(index_file, "#{}", dict_list.join(",")).ok();
}

fn search(index_path : &str, search_args : Vec<&str>) {
    let index_file = File::open(index_path).expect("could not open index file");
    let reader = BufReader::new(index_file);
    let mut filecaches : Vec<FileCache> = Vec::new();
    let mut dict = Dictionary::new();


    for line in reader.lines() {
        let l = line.unwrap();
        if l.starts_with("#") {
            dict = Dictionary::from_line(&l.strip_prefix("#").unwrap());
        } else {
            filecaches.push(FileCache::from_line(l));
        }
    }

    let mut v : FileVector = FileVector::new();

    for arg in search_args {
        v.insert(dict.get(arg.to_string()), 1);
    }

    let mut results : Vec<SearchResult> = Vec::new();

    for filecache in filecaches.iter() {
        let mut r = SearchResult { priority : 0, path : filecache.path.clone() };
        r.priority = vector::scalar_product(&v, &filecache.vector);
        if r.priority > 0 {
            results.push(r);
        }
    }
    results.sort_by(|a, b| b.priority.cmp(&a.priority));

    for result in results.iter() {
        println!("{}", result.path);
    }

    println!("{} results", results.len())

}

fn main() {
    println!("Generating Index...");
    generate_index("/home/n8", "index.idxs");
    println!("Searching...");
    search("index.idxs", vec!["one", "difficult", "under", "linux"]);
}