aboutsummaryrefslogtreecommitdiff
path: root/src/main.rs
blob: 2e49fd4dc54fbc8fc2e5e2c4fd7d29a4be30140b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
pub mod vector;
pub mod dictionary;
pub mod text;
pub mod splitter;
pub mod filecache;
pub mod searchresult;
pub mod filecounter;
pub mod extractors;

use vector::FileVector;
use dictionary::Dictionary;
use filecache::FileCache;
use searchresult::SearchResult;
use filecounter::filecount;
use std::fs::File;
use std::io::{Write, BufReader, BufRead};
use walkdir::*;
use std::thread;
use std::option::Option::None;

fn generate_index(input_path : &str, index_path : &str) {
    let mut index_file = File::create(index_path).unwrap();
    let mut dict = Dictionary::new();
    let mut nof = 0;
    let mut counter = 0;

    thread::scope(|s| {
        let mut nof_handle : Option<_> = Some(s.spawn(|| filecount(input_path)));

        for entry in WalkDir::new(input_path).into_iter().filter_map(|e| e.ok()) {
            counter += 1;
            if entry.path().is_file() {
                let content : String = text::extract_text(entry.path().to_str().unwrap());

                if content.is_empty() {
                    continue
                }

                let words : Vec<String> = splitter::split_to_words(content);

                for word in words.iter() {
                    dict.set(word.clone());
                }

                let fv = dict.vectorize_word_list(words.clone());
                writeln!(index_file, "{}, {}", entry.path().to_str().unwrap().replace(",", "\0"), fv.to_hex()).ok();


                match nof_handle {
                    Some(t) => {
                        nof = t.join().unwrap();
                        nof_handle = None;
                    }
                    None => {
                        eprint!("\r\x1b[2K{} of {} files indexed ({}%)", counter, nof, (counter * 100) / nof);
                        std::io::stdout().flush().ok();
                    }
                }
            }
        }

        eprintln!("\r\x1b[2Kall files indexed (100%)");

        let dict_list : Vec<String> = dict.to_list();
        writeln!(index_file, "#{}", dict_list.join(",")).ok();
    });
}

fn search(index_path : &str, search_args : Vec<&str>) {
    let index_file = File::open(index_path).expect("could not open index file");
    let reader = BufReader::new(index_file);
    let mut filecaches : Vec<FileCache> = Vec::new();
    let mut dict = Dictionary::new();


    for line in reader.lines() {
        let l = line.unwrap();
        if l.starts_with("#") {
            dict = Dictionary::from_line(&l.strip_prefix("#").unwrap());
        } else {
            filecaches.push(FileCache::from_line(l));
        }
    }

    let mut v : FileVector = FileVector::new();

    for arg in search_args {
        v.insert(dict.get(arg.to_string()), 1);
    }

    let mut results : Vec<SearchResult> = Vec::new();

    for filecache in filecaches.iter() {
        let mut r = SearchResult { priority : 0, path : filecache.path.clone() };
        r.priority = vector::scalar_product(&v, &filecache.vector);
        if r.priority > 0 {
            results.push(r);
        }
    }
    results.sort_by(|a, b| b.priority.cmp(&a.priority));

    for result in results.iter() {
        println!("{}", result.path);
    }

    println!("{} results", results.len())

}

fn main() {
    println!("Generating Index...");
    generate_index("/home/n8", "index.idxs");
    println!("Searching...");
    search("index.idxs", vec!["welt"]);
}