aboutsummaryrefslogtreecommitdiff
path: root/src/text
diff options
context:
space:
mode:
Diffstat (limited to 'src/text')
-rw-r--r--src/text/docx.rs9
-rw-r--r--src/text/mod.rs12
-rw-r--r--src/text/odp.rs9
-rw-r--r--src/text/odt.rs9
-rw-r--r--src/text/pdf.rs5
-rw-r--r--src/text/pptx.rs9
-rw-r--r--src/text/xlsx.rs9
7 files changed, 62 insertions, 0 deletions
diff --git a/src/text/docx.rs b/src/text/docx.rs
new file mode 100644
index 0000000..7b4a80e
--- /dev/null
+++ b/src/text/docx.rs
@@ -0,0 +1,9 @@
+use std::io::Read;
+use dotext::{Docx, MsDoc};
+
+pub fn get_text(path : &str) -> String {
+ let mut file = Docx::open(path).unwrap();
+ let mut content = String::new();
+ let _ = file.read_to_string(&mut content);
+ content
+}
diff --git a/src/text/mod.rs b/src/text/mod.rs
index 4e1b9a4..dd969af 100644
--- a/src/text/mod.rs
+++ b/src/text/mod.rs
@@ -4,6 +4,12 @@ use std::path::Path;
use std::collections::HashMap;
mod txt;
+mod docx;
+mod xlsx;
+mod pptx;
+mod odt;
+mod odp;
+mod pdf;
fn empty_extractor(_ : &str) -> String {
"".to_string()
@@ -19,6 +25,12 @@ lazy_static! {
static ref EXT: HashMap<&'static str, ExtFn> = {
HashMap::from([
ext!(txt),
+ ext!(docx),
+ ext!(xlsx),
+ ext!(pptx),
+ ext!(odt),
+ ext!(odp),
+ ext!(pdf),
])
};
}
diff --git a/src/text/odp.rs b/src/text/odp.rs
new file mode 100644
index 0000000..eaed196
--- /dev/null
+++ b/src/text/odp.rs
@@ -0,0 +1,9 @@
+use std::io::Read;
+use dotext::{Odp, doc::OpenOfficeDoc};
+
+pub fn get_text(path : &str) -> String {
+ let mut file = Odp::open(path).unwrap();
+ let mut content = String::new();
+ let _ = file.read_to_string(&mut content);
+ content
+}
diff --git a/src/text/odt.rs b/src/text/odt.rs
new file mode 100644
index 0000000..10b5342
--- /dev/null
+++ b/src/text/odt.rs
@@ -0,0 +1,9 @@
+use std::io::Read;
+use dotext::{Odt, doc::OpenOfficeDoc};
+
+pub fn get_text(path : &str) -> String {
+ let mut file = Odt::open(path).unwrap();
+ let mut content = String::new();
+ let _ = file.read_to_string(&mut content);
+ content
+}
diff --git a/src/text/pdf.rs b/src/text/pdf.rs
new file mode 100644
index 0000000..efa441f
--- /dev/null
+++ b/src/text/pdf.rs
@@ -0,0 +1,5 @@
+use crate::extractors::pdf;
+
+pub fn get_text(path : &str) -> String {
+ pdf::pdf2text(path).ok().unwrap_or_else(|| "".to_string())
+}
diff --git a/src/text/pptx.rs b/src/text/pptx.rs
new file mode 100644
index 0000000..7dac77e
--- /dev/null
+++ b/src/text/pptx.rs
@@ -0,0 +1,9 @@
+use std::io::Read;
+use dotext::{Pptx, MsDoc};
+
+pub fn get_text(path : &str) -> String {
+ let mut file = Pptx::open(path).unwrap();
+ let mut content = String::new();
+ let _ = file.read_to_string(&mut content);
+ content
+}
diff --git a/src/text/xlsx.rs b/src/text/xlsx.rs
new file mode 100644
index 0000000..a438e96
--- /dev/null
+++ b/src/text/xlsx.rs
@@ -0,0 +1,9 @@
+use std::io::Read;
+use dotext::{Xlsx, MsDoc};
+
+pub fn get_text(path : &str) -> String {
+ let mut file = Xlsx::open(path).unwrap();
+ let mut content = String::new();
+ let _ = file.read_to_string(&mut content);
+ content
+}