cryptatools_core/cryptanalysis/plain_text_detector.rs
1use lingua::{Language, LanguageDetector, LanguageDetectorBuilder};
2
3pub struct PlainTextDetector {
4}
5
6impl PlainTextDetector {
7 pub fn new() -> Self {
8 PlainTextDetector {
9 }
10 }
11
12 /// Detect if plain text if the text correspond to a set of specified know languages.
13 ///
14 /// The presumed plain text is passed as argument. The `minimum_confidence_value` variable is 1 if we are sure that any word exactly correspond to the corresponding language.
15 /// The `minimum_confidence_value` is 0 if we are sure it does not correspond to the corresponding language at all.
16 /// Return true if it is plain text. Else return false.
17 ///
18 /// ```
19 /// use lingua::Language::*;
20 /// use lingua::Language;
21 /// use cryptatools_core::cryptanalysis::plain_text_detector::PlainTextDetector;
22 /// let mut ptd: PlainTextDetector = PlainTextDetector::new();
23 /// let text: String = String::from("The ennemies will attack at midnight!");
24 /// let is_plain_text = ptd.is_plain_text(text, vec![], 0.0);
25 /// assert_eq!(is_plain_text, true);
26 /// ```
27 ///
28 /// ```
29 /// use lingua::Language::*;
30 /// use lingua::Language;
31 /// use cryptatools_core::cryptanalysis::plain_text_detector::PlainTextDetector;
32 /// let mut ptd: PlainTextDetector = PlainTextDetector::new();
33 /// let text: String = String::from("d0n0mIn0thing");
34 /// let is_plain_text = ptd.is_plain_text(text, vec![lingua::Language::English, lingua::Language::French], 8.0);
35 /// assert_eq!(is_plain_text, false);
36 /// ```
37 pub fn is_plain_text(self, plain_or_cipher_text: String, languages: Vec<lingua::Language>, minimum_confidence_value: f64) -> bool {
38 let languages_confidence_values = self.catch_confidence_values(plain_or_cipher_text, languages);
39
40 let strongest_language = languages_confidence_values.unwrap().into_iter().max_by(|a, b| a.1.total_cmp(&b.1));
41 if strongest_language.is_none() != true {
42 let most_probably_detect_language_confidence_value: f64 = strongest_language.unwrap().1;
43 if most_probably_detect_language_confidence_value >= minimum_confidence_value {
44 return true;
45 } else {
46 return false;
47 }
48 } else {
49 return false;
50 }
51 }
52
53
54 /// For each `languages` set, return a tuple with confidence value.
55 ///
56 /// The confidence value is a value attributed to a text and a language.
57 /// More the text corresponds to the corresponding language, more the confidence value will be hight.
58 ///
59 /// ```
60 /// use lingua::Language::*;
61 /// use lingua::Language;
62 /// use cryptatools_core::cryptanalysis::plain_text_detector::PlainTextDetector;
63 /// let mut ptd: PlainTextDetector = PlainTextDetector::new();
64 /// let text: String = String::from("The ennemies will attack at midnight!");
65 /// let is_plain_text = ptd.is_plain_text(text, vec![lingua::Language::English, lingua::Language::French], 0.0);
66 /// assert_eq!(is_plain_text, true);
67 /// ```
68 pub fn catch_confidence_values(self, plain_or_cipher_text: String, languages: Vec<lingua::Language>) -> Option<Vec<(Language, f64)>> {
69 let detector: LanguageDetector = match languages.len() {
70 0 => LanguageDetectorBuilder::from_all_languages().build(),
71 _ => LanguageDetectorBuilder::from_languages(languages.as_slice()).build(),
72 };
73
74 let detected_languages: Vec<(Language, f64)> = detector.compute_language_confidence_values(plain_or_cipher_text);
75
76 Some(detected_languages)
77 }
78
79 /// Detect the language used in a plain text using the confidence value algorithm.
80 ///
81 ///
82 ///
83 /// ```
84 /// use lingua::Language::*;
85 /// use lingua::Language;
86 /// use cryptatools_core::cryptanalysis::plain_text_detector::PlainTextDetector;
87 /// let mut ptd: PlainTextDetector = PlainTextDetector::new();
88 /// let text: String = String::from("languages are awesome");
89 /// let detected_language: Option<Language> = ptd.detect_language(text, vec![]);
90 /// assert_eq!(detected_language, Some(English));
91 /// ```
92 ///
93 ///
94 /// ```
95 /// use lingua::Language::*;
96 /// use lingua::Language;
97 /// use cryptatools_core::cryptanalysis::plain_text_detector::PlainTextDetector;
98 /// let mut ptd: PlainTextDetector = PlainTextDetector::new();
99 /// let text: String = String::from("languages are awesome");
100 /// let detected_language: Option<Language> = ptd.detect_language(text, vec![lingua::Language::English, lingua::Language::French]);
101 /// assert_eq!(detected_language, Some(English));
102 /// ```
103
104 pub fn detect_language(self, plain_or_cipher_text: String, languages: Vec<lingua::Language>) -> Option<Language> {
105 let most_probably_detected_language = self.catch_confidence_values(plain_or_cipher_text, languages).unwrap().into_iter().max_by(|a, b| a.1.total_cmp(&b.1)).unwrap().0;
106
107 Some(most_probably_detected_language)
108 }
109
110
111}