cryptatools_core/cryptanalysis/
plain_text_detector.rs

1use lingua::{Language, LanguageDetector, LanguageDetectorBuilder};
2
3pub struct PlainTextDetector {
4}
5
6impl PlainTextDetector {
7    pub fn new() -> Self {
8        PlainTextDetector {
9        }
10    }
11
12     ///  Detect if plain text if the text correspond to a set of specified know languages.
13     ///
14     ///  The presumed plain text is passed as argument. The `minimum_confidence_value` variable is 1 if we are sure that any word exactly correspond to the corresponding language.
15     ///  The `minimum_confidence_value` is 0 if we are sure it does not correspond to the corresponding language at all.
16     ///  Return true if it is plain text. Else return false.
17     /// 
18     ///  ```
19     ///  use lingua::Language::*;
20     ///  use lingua::Language;
21     ///  use cryptatools_core::cryptanalysis::plain_text_detector::PlainTextDetector;
22     ///  let mut ptd: PlainTextDetector = PlainTextDetector::new();
23     ///  let text: String = String::from("The ennemies will attack at midnight!");
24     ///  let is_plain_text = ptd.is_plain_text(text, vec![], 0.0);
25     ///  assert_eq!(is_plain_text, true);
26     ///  ```
27     /// 
28     ///  ```
29     ///  use lingua::Language::*;
30     ///  use lingua::Language;
31     ///  use cryptatools_core::cryptanalysis::plain_text_detector::PlainTextDetector;
32     ///  let mut ptd: PlainTextDetector = PlainTextDetector::new();
33     ///  let text: String = String::from("d0n0mIn0thing");
34     ///  let is_plain_text = ptd.is_plain_text(text, vec![lingua::Language::English, lingua::Language::French], 8.0);
35     ///  assert_eq!(is_plain_text, false);
36     ///  ```
37     pub fn is_plain_text(self, plain_or_cipher_text: String, languages: Vec<lingua::Language>, minimum_confidence_value: f64) -> bool {
38        let languages_confidence_values = self.catch_confidence_values(plain_or_cipher_text, languages);
39
40        let strongest_language = languages_confidence_values.unwrap().into_iter().max_by(|a, b| a.1.total_cmp(&b.1));
41        if strongest_language.is_none() != true {
42            let most_probably_detect_language_confidence_value: f64 = strongest_language.unwrap().1;
43            if most_probably_detect_language_confidence_value >= minimum_confidence_value {
44                return true;
45            } else {
46                return false;
47            }
48        } else {
49            return false;
50        }
51    }
52
53
54     ///  For each `languages` set, return a tuple with confidence value.
55     ///
56     ///  The confidence value is a value attributed to a text and a language.
57     ///  More the text corresponds to the corresponding language, more the confidence value will be hight.
58     /// 
59     ///  ```
60     ///  use lingua::Language::*;
61     ///  use lingua::Language;
62     ///  use cryptatools_core::cryptanalysis::plain_text_detector::PlainTextDetector;
63     ///  let mut ptd: PlainTextDetector = PlainTextDetector::new();
64     ///  let text: String = String::from("The ennemies will attack at midnight!");
65     ///  let is_plain_text = ptd.is_plain_text(text, vec![lingua::Language::English, lingua::Language::French], 0.0);
66     ///  assert_eq!(is_plain_text, true);
67     ///  ```
68    pub fn catch_confidence_values(self, plain_or_cipher_text: String, languages: Vec<lingua::Language>) -> Option<Vec<(Language, f64)>> {
69        let detector: LanguageDetector = match languages.len() {
70            0 => LanguageDetectorBuilder::from_all_languages().build(),
71            _ => LanguageDetectorBuilder::from_languages(languages.as_slice()).build(),
72        };
73
74        let detected_languages: Vec<(Language, f64)> = detector.compute_language_confidence_values(plain_or_cipher_text);
75    
76        Some(detected_languages)
77    }
78
79     ///  Detect the language used in a plain text using the confidence value algorithm.
80     /// 
81     ///  
82     /// 
83     ///  ```
84     ///  use lingua::Language::*;
85     ///  use lingua::Language;
86     ///  use cryptatools_core::cryptanalysis::plain_text_detector::PlainTextDetector;
87     ///  let mut ptd: PlainTextDetector = PlainTextDetector::new();
88     ///  let text: String = String::from("languages are awesome");
89     ///  let detected_language: Option<Language> = ptd.detect_language(text, vec![]);
90     ///  assert_eq!(detected_language, Some(English));
91     ///  ```
92     /// 
93     /// 
94     ///  ```
95     ///  use lingua::Language::*;
96     ///  use lingua::Language;
97     ///  use cryptatools_core::cryptanalysis::plain_text_detector::PlainTextDetector;
98     ///  let mut ptd: PlainTextDetector = PlainTextDetector::new();
99     ///  let text: String = String::from("languages are awesome");
100     ///  let detected_language: Option<Language> = ptd.detect_language(text, vec![lingua::Language::English, lingua::Language::French]);
101     ///  assert_eq!(detected_language, Some(English));
102     ///  ```
103
104    pub fn detect_language(self, plain_or_cipher_text: String, languages: Vec<lingua::Language>) -> Option<Language> {
105        let most_probably_detected_language = self.catch_confidence_values(plain_or_cipher_text, languages).unwrap().into_iter().max_by(|a, b| a.1.total_cmp(&b.1)).unwrap().0;
106        
107        Some(most_probably_detected_language)
108    }
109
110
111}