diff options
| -rw-r--r-- | src/dictionary.rs | 4 | ||||
| -rw-r--r-- | src/extractors/mod.rs | 2 | ||||
| -rw-r--r-- | src/extractors/pdf.rs | 5 | ||||
| -rw-r--r-- | src/filecache.rs | 1 | ||||
| -rw-r--r-- | src/filecounter.rs | 2 | ||||
| -rw-r--r-- | src/index.rs | 2 | ||||
| -rw-r--r-- | src/searchresult.rs | 3 | ||||
| -rw-r--r-- | src/splitter.rs | 2 | ||||
| -rw-r--r-- | src/text/mod.rs | 3 | ||||
| -rw-r--r-- | src/vector.rs | 4 |
10 files changed, 27 insertions, 1 deletions
diff --git a/src/dictionary.rs b/src/dictionary.rs index e0a561a..94f377d 100644 --- a/src/dictionary.rs +++ b/src/dictionary.rs @@ -1,6 +1,10 @@ use std::collections::HashMap; use crate::vector::FileVector; +/// The dictionary is used to cache to words ids. +/// It also provides a function to convert it to +/// a vector and generate a FileVector from a word list +/// with the current directory. pub struct Dictionary { last_index : usize, data : HashMap<String, u64>, diff --git a/src/extractors/mod.rs b/src/extractors/mod.rs index 0f137f1..cd76a11 100644 --- a/src/extractors/mod.rs +++ b/src/extractors/mod.rs @@ -1 +1,3 @@ +/// This module provides extractors which are not available directly as a crate. + pub mod pdf; diff --git a/src/extractors/pdf.rs b/src/extractors/pdf.rs index b7f65d5..8dfe82f 100644 --- a/src/extractors/pdf.rs +++ b/src/extractors/pdf.rs @@ -1,3 +1,4 @@ +// // MIT License // // Copyright (c) 2016 Junfeng Liu @@ -19,6 +20,10 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. +// +// +// For more information see `https://github.com/J-F-Liu/lopdf/blob/master/examples/extract_text.rs` +// use std::collections::BTreeMap; use std::fmt::Debug; diff --git a/src/filecache.rs b/src/filecache.rs index 287cea0..7faf439 100644 --- a/src/filecache.rs +++ b/src/filecache.rs @@ -1,5 +1,6 @@ use crate::vector::FileVector; +/// Represents one file which was indexed. pub struct FileCache { pub vector : FileVector, pub path : String, diff --git a/src/filecounter.rs b/src/filecounter.rs index 1ed58e3..3532fa3 100644 --- a/src/filecounter.rs +++ b/src/filecounter.rs @@ -1,5 +1,7 @@ use walkdir::*; +/// Function to count the number of entries in a directory. +/// Is needed to get a progress. pub fn filecount(path : &str) -> u64 { let mut counter : u64 = 0; for _ in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) { diff --git a/src/index.rs b/src/index.rs index 8c358d3..616682d 100644 --- a/src/index.rs +++ b/src/index.rs @@ -12,6 +12,8 @@ use crate::text; use crate::splitter; use crate::vector; +/// Represents a Index which is ether generated +/// or read from a file. pub struct Index { dictionary : Dictionary, filecache : Vec<FileCache>, diff --git a/src/searchresult.rs b/src/searchresult.rs index 6a0dd30..cc882ff 100644 --- a/src/searchresult.rs +++ b/src/searchresult.rs @@ -1,4 +1,5 @@ - +/// This struct is returned by the index::Index::search +/// function. pub struct SearchResult { pub priority : u64, pub path : String diff --git a/src/splitter.rs b/src/splitter.rs index 64e659f..fbb2b6a 100644 --- a/src/splitter.rs +++ b/src/splitter.rs @@ -1,5 +1,7 @@ use std::vec::Vec; +/// Splits a line into its words used to create a +/// dictionary and search arguments. pub fn split_to_words(data : String) -> Vec<String> { let mut v : Vec<String> = data .to_lowercase() diff --git a/src/text/mod.rs b/src/text/mod.rs index dd969af..c3fd8f6 100644 --- a/src/text/mod.rs +++ b/src/text/mod.rs @@ -36,6 +36,9 @@ lazy_static! { } +/// Extract text from files if there is an extractor. +/// If there is an error like `permission denied` or there +/// is no extractor this function returns a empty string. pub fn extract_text(path : &str) -> String { let p = Path::new(&path); let extenstion = p.extension().unwrap_or_else(|| OsStr::new("")).to_str().unwrap(); diff --git a/src/vector.rs b/src/vector.rs index 56d1817..0779e07 100644 --- a/src/vector.rs +++ b/src/vector.rs @@ -1,6 +1,10 @@ use std::collections::HashMap; use std::ops::{Deref, DerefMut}; +/// Represents the content of a cached file. +/// It is stored as a HashMap, because we do not +/// have to store the zeros. With that we save a lot +/// of storage. pub struct FileVector { data : HashMap<u64, u64> } |