aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Reiner <nathan@nathanreiner.xyz>2023-07-06 14:20:14 +0200
committerNathan Reiner <nathan@nathanreiner.xyz>2023-07-06 14:20:14 +0200
commit9b4aa4e9643b0a5b4a554e455eac269a2472b590 (patch)
treecdab839b50a137db2a6df480a0f75f1f1385cb41
parent8c3c8efa428200c67c1a6fa731faee55adf19678 (diff)
add some documentation to structs
-rw-r--r--src/dictionary.rs4
-rw-r--r--src/extractors/mod.rs2
-rw-r--r--src/extractors/pdf.rs5
-rw-r--r--src/filecache.rs1
-rw-r--r--src/filecounter.rs2
-rw-r--r--src/index.rs2
-rw-r--r--src/searchresult.rs3
-rw-r--r--src/splitter.rs2
-rw-r--r--src/text/mod.rs3
-rw-r--r--src/vector.rs4
10 files changed, 27 insertions, 1 deletions
diff --git a/src/dictionary.rs b/src/dictionary.rs
index e0a561a..94f377d 100644
--- a/src/dictionary.rs
+++ b/src/dictionary.rs
@@ -1,6 +1,10 @@
use std::collections::HashMap;
use crate::vector::FileVector;
+/// The dictionary is used to cache to words ids.
+/// It also provides a function to convert it to
+/// a vector and generate a FileVector from a word list
+/// with the current directory.
pub struct Dictionary {
last_index : usize,
data : HashMap<String, u64>,
diff --git a/src/extractors/mod.rs b/src/extractors/mod.rs
index 0f137f1..cd76a11 100644
--- a/src/extractors/mod.rs
+++ b/src/extractors/mod.rs
@@ -1 +1,3 @@
+/// This module provides extractors which are not available directly as a crate.
+
pub mod pdf;
diff --git a/src/extractors/pdf.rs b/src/extractors/pdf.rs
index b7f65d5..8dfe82f 100644
--- a/src/extractors/pdf.rs
+++ b/src/extractors/pdf.rs
@@ -1,3 +1,4 @@
+//
// MIT License
//
// Copyright (c) 2016 Junfeng Liu
@@ -19,6 +20,10 @@
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
+//
+//
+// For more information see `https://github.com/J-F-Liu/lopdf/blob/master/examples/extract_text.rs`
+//
use std::collections::BTreeMap;
use std::fmt::Debug;
diff --git a/src/filecache.rs b/src/filecache.rs
index 287cea0..7faf439 100644
--- a/src/filecache.rs
+++ b/src/filecache.rs
@@ -1,5 +1,6 @@
use crate::vector::FileVector;
+/// Represents one file which was indexed.
pub struct FileCache {
pub vector : FileVector,
pub path : String,
diff --git a/src/filecounter.rs b/src/filecounter.rs
index 1ed58e3..3532fa3 100644
--- a/src/filecounter.rs
+++ b/src/filecounter.rs
@@ -1,5 +1,7 @@
use walkdir::*;
+/// Function to count the number of entries in a directory.
+/// Is needed to get a progress.
pub fn filecount(path : &str) -> u64 {
let mut counter : u64 = 0;
for _ in WalkDir::new(path).into_iter().filter_map(|e| e.ok()) {
diff --git a/src/index.rs b/src/index.rs
index 8c358d3..616682d 100644
--- a/src/index.rs
+++ b/src/index.rs
@@ -12,6 +12,8 @@ use crate::text;
use crate::splitter;
use crate::vector;
+/// Represents a Index which is ether generated
+/// or read from a file.
pub struct Index {
dictionary : Dictionary,
filecache : Vec<FileCache>,
diff --git a/src/searchresult.rs b/src/searchresult.rs
index 6a0dd30..cc882ff 100644
--- a/src/searchresult.rs
+++ b/src/searchresult.rs
@@ -1,4 +1,5 @@
-
+/// This struct is returned by the index::Index::search
+/// function.
pub struct SearchResult {
pub priority : u64,
pub path : String
diff --git a/src/splitter.rs b/src/splitter.rs
index 64e659f..fbb2b6a 100644
--- a/src/splitter.rs
+++ b/src/splitter.rs
@@ -1,5 +1,7 @@
use std::vec::Vec;
+/// Splits a line into its words used to create a
+/// dictionary and search arguments.
pub fn split_to_words(data : String) -> Vec<String> {
let mut v : Vec<String> = data
.to_lowercase()
diff --git a/src/text/mod.rs b/src/text/mod.rs
index dd969af..c3fd8f6 100644
--- a/src/text/mod.rs
+++ b/src/text/mod.rs
@@ -36,6 +36,9 @@ lazy_static! {
}
+/// Extract text from files if there is an extractor.
+/// If there is an error like `permission denied` or there
+/// is no extractor this function returns a empty string.
pub fn extract_text(path : &str) -> String {
let p = Path::new(&path);
let extenstion = p.extension().unwrap_or_else(|| OsStr::new("")).to_str().unwrap();
diff --git a/src/vector.rs b/src/vector.rs
index 56d1817..0779e07 100644
--- a/src/vector.rs
+++ b/src/vector.rs
@@ -1,6 +1,10 @@
use std::collections::HashMap;
use std::ops::{Deref, DerefMut};
+/// Represents the content of a cached file.
+/// It is stored as a HashMap, because we do not
+/// have to store the zeros. With that we save a lot
+/// of storage.
pub struct FileVector {
data : HashMap<u64, u64>
}