Also add url_parsing to this crate.

author: Andreas Grois <andi@grois.info> 2022-10-09 14:00:38 +0200
committer: Andreas Grois <andi@grois.info> 2022-10-09 14:00:38 +0200
commit: d6d345207530ec3232d937aeee3b0c9255b33129 (patch)
tree: 0645a54d0f695092ed6c0003777d2e212a99e5e5
parent: 5e51b706d54a26470f33d1342f4666d5aab921fc (diff)
4 files changed, 711 insertions, 67 deletions
diff --git a/src/lib.rs b/src/lib.rs
index ad4ae5c..e9181c3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,17 @@
+#![warn(missing_docs)]
+//! Library that should allow quick implementation of tools that are compatible with PasswordMaker Pro.
+//! 
+//! It forms the core of an upcoming PasswordMaker Pro compatible Sailfish OS App (as of yet unnamed).
+//! This library intentionally does not depend on any specific implementation of the cryptographic hashes
+//! it relies on. To see an example of how to integrate with the [Rust Crypto Hashes](https://github.com/RustCrypto/hashes),
+//! see the integration tests.
+//! 
+//! There are two main functions in this library: [`generate_password`][PasswordMaker::generate_password] and
+//! [`parse()`][UrlParsing::parse].
+
+
 mod passwordmaker;
+mod url_parsing;
 use passwordmaker::{PasswordPartParameters, PasswordAssemblyParameters};
 use passwordmaker::leet::LeetReplacementTable;
 use std::error::Error;
@@ -7,8 +20,13 @@ use std::marker::PhantomData;
 
 /// Trait you need to implement for the various hash functions you need to provide.
 /// Currently only a single function, that computes the hash of a string slice, is needed. This may change in a later version.
+/// 
+/// Beware: There is currently no way to put constraints on associated constants in Rust, so Block Size is not exposed.
+/// It's anyhow the same (currently hardcoded) value for all supported algorithms.
 pub trait Hasher {
+    /// The output type of the respective hash function. Typically some form of byte array.
     type Output;
+    /// Function that takes a byte array as input, and generates the cryptographic hash of it as output.
     fn hash(input : &[u8]) -> Self::Output;
 }
 
@@ -25,18 +43,23 @@ pub trait Ripemd160 : Hasher<Output = [u8;20]> {}
 
 /// List of hash functions to use. Trait may change in later versions to include constructors for actual hasher objects.
 pub trait HasherList {
+    /// The type that offers MD4 hashing. See the [`Md4`] trait.
     type MD4 : Md4;
+    /// The type that offers MD5 hashing. See the [`Md5`] trait.
     type MD5 : Md5;
+    /// The type that offers SHA1 hashing. See the [`Sha1`] trait.
     type SHA1 : Sha1;
+    /// The type that offers SHA256 hashing. See the [`Sha256`] trait.
     type SHA256 : Sha256;
+    /// The type that offers Ripemd160 hashing. See the [`Ripemd160`] trait.
     type RIPEMD160 : Ripemd160;
 }
 
 /// A single-use instance of PasswordMaker, created after all inputs are verified to be usable.
 /// Only has one method, which is to generate the password.
 pub struct PasswordMaker<'a, T : HasherList>{
-    data : &'a str, //aka url aka used text
-    key : &'a str, //aka master password
+    data : String, //aka url aka used text
+    key : String, //aka master password
     username : &'a str,
     modifier : &'a str,
     password_part_parameters : PasswordPartParameters<'a>, //contains pre_leet, as this is different for different algorithms
@@ -46,7 +69,41 @@ pub struct PasswordMaker<'a, T : HasherList>{
 }
 
 impl<'a, T : HasherList> PasswordMaker<'a, T>{
-    /// Validates user input and returns a PasswordMaker if the input is valid.
+    /// Generates a password in a way that's (hopefully) compatible to PasswordMaker Pro. Returns an error for unusable input.
+    /// 
+    /// `data` is the string to use, typically a URL or a part of it.
+    /// `key` is the master password.
+    /// `hash_algorithm` is a PasswordMaker Pro algorithm selection.
+    /// `use_leet` details when to use leet, if at all.
+    /// `characters` is the list of output password characters. Actually this is not true. It's the list of grapheme clusters.
+    /// `username` is the "username" field of PasswordMaker Pro.
+    /// `modifier` is the "modifier" field of PasswordMaker Pro.
+    /// `password_length` is the desired password length to generate.
+    /// `prefix` is the prefix to which the password gets appended. Counts towards `password_length`.
+    /// `suffix` is the suffix appended to the password. Counts towards `password_length`.
+    pub fn generate_password(
+        data : String,
+        key: String,
+        hash_algorithm : HashAlgorithm,
+        use_leet : UseLeetWhenGenerating,
+        characters : &'a str,
+        username : &'a str,
+        modifier: &'a str,
+        password_length : usize,
+        prefix : &'a str,
+        suffix : &'a str,
+    ) -> Result<String, GenerationError>{
+        Ok(
+            Self::validate_input(data, key, hash_algorithm, use_leet, characters, username, modifier, password_length, prefix, suffix)?
+            .generate()
+        )
+    }
+
+
+    /// Validates user input and returns a `PasswordMaker` object if the input is valid.
+    /// Use this if you want to split input validation from actual password computation.
+    /// Otherwise, consider using the `generate_password` function for shorter code.
+    /// 
     /// `data` is the string to use, typically a URL or a part of it.
     /// `key` is the master password.
     /// `hash_algorithm` is a PasswordMaker Pro algorithm selection.
@@ -58,8 +115,8 @@ impl<'a, T : HasherList> PasswordMaker<'a, T>{
     /// `prefix` is the prefix to which the password gets appended. Counts towards `password_length`.
     /// `suffix` is the suffix appended to the password. Counts towards `password_length`.
     pub fn validate_input(
-        data : &'a str,
-        key: &'a str,
+        data : String,
+        key: String,
         hash_algorithm : HashAlgorithm,
         use_leet : UseLeetWhenGenerating,
         characters : &'a str,
@@ -107,14 +164,23 @@ impl<'a, T : HasherList> PasswordMaker<'a, T>{
 #[cfg_attr(test, derive(strum_macros::EnumIter))]
 #[derive(Debug,Clone, Copy)]
 pub enum LeetLevel {
+    /// First Leet level: ["4", "b", "c", "d", "3", "f", "g", "h", "i", "j", "k", "1", "m", "n", "0", "p", "9", "r", "s", "7", "u", "v", "w", "x", "y", "z"]
     One,
+    /// Second Leet level: ["4", "b", "c", "d", "3", "f", "g", "h", "1", "j", "k", "1", "m", "n", "0", "p", "9", "r", "5", "7", "u", "v", "w", "x", "y", "2"]
     Two,
+    /// Third Leet level: ["4", "8", "c", "d", "3", "f", "6", "h", "'", "j", "k", "1", "m", "n", "0", "p", "9", "r", "5", "7", "u", "v", "w", "x", "'/", "2"]
     Three,
+    /// Fourth Leet level: ["@", "8", "c", "d", "3", "f", "6", "h", "'", "j", "k", "1", "m", "n", "0", "p", "9", "r", "5", "7", "u", "v", "w", "x", "'/", "2"]
     Four,
+    /// Fifth Leet level: ["@", "|3", "c", "d", "3", "f", "6", "#", "!", "7", "|<", "1", "m", "n", "0", "|>", "9", "|2", "$", "7", "u", "\\/", "w", "x", "'/", "2"]
     Five,
+    /// Sixth Leet level: ["@", "|3", "c", "|)", "&", "|=", "6", "#", "!", ",|", "|<", "1", "m", "n", "0", "|>", "9", "|2", "$", "7", "u", "\\/", "w", "x", "'/", "2"]
     Six,
+    /// Seventh Leet level: ["@", "|3", "[", "|)", "&", "|=", "6", "#", "!", ",|", "|<", "1", "^^", "^/", "0", "|*", "9", "|2", "5", "7", "(_)", "\\/", "\\/\\/", "><", "'/", "2"]
     Seven,
+    /// Eigth Leet level: ["@", "8", "(", "|)", "&", "|=", "6", "|-|", "!", "_|", "|(", "1", "|\\/|", "|\\|", "()", "|>", "(,)", "|2", "$", "|", "|_|", "\\/", "\\^/", ")(", "'/", "\"/_"]
     Eight,
+    /// Ninth Leet level: ["@", "8", "(", "|)", "&", "|=", "6", "|-|", "!", "_|", "|{", "|_", "/\\/\\", "|\\|", "()", "|>", "(,)", "|2", "$", "|", "|_|", "\\/", "\\^/", ")(", "'/", "\"/_"]
     Nine,
 }
 
@@ -129,37 +195,102 @@ pub enum LeetLevel {
 /// The `HmacMd5Version06` is similarly ignoring the supplied characters and using hexadecimal numbers as output.
 #[derive(Debug,Clone, Copy)]
 pub enum HashAlgorithm {
+    /// Regular Md4 PasswordMaker Pro setting.
     Md4,
+    /// HAMC Md4 PasswordMaker Pro setting. Encodes input as UTF-16 and discards upper byte (just as PasswordMaker Pro does for HMAC).
     HmacMd4,
+    /// Regular Md5 PasswordMaker Pro setting.
     Md5,
+    /// Md5 as computed by PasswordMaker Pro version 0.6. Encodes input as UTF-16 and discards upper byte and outputs MD5 as hex number.
     Md5Version06,
+    /// HMAC Md5 PasswordMaker Pro setting. Encodes input as UTF-16 and discards upper byte (just as PasswordMaker Pro does for HMAC).
     HmacMd5,
+    /// HMAC Md5 as computed by PasswordMaker Pro version 0.6. Encodes input as UTF-16 and discards upper byte and outputs MD5 as hex number.
     HmacMd5Version06,
+    /// Regular Sha1 PasswordMaker Pro setting.
     Sha1,
+    /// HAMC Sha1 PasswordMaker Pro setting. Encodes input as UTF-16 and discards upper byte (just as PasswordMaker Pro does for HMAC).
     HmacSha1,
+    /// Regular Sha256 PasswordMaker Pro setting.
     Sha256,
+    /// HAMC Sha256 PasswordMaker Pro setting. Encodes input as UTF-16 and discards upper byte (just as PasswordMaker Pro does for HMAC).
     HmacSha256,
+    /// Regular Ripemd160 PasswordMaker Pro setting.
     Ripemd160,
+    /// HAMC Ripemd160 PasswordMaker Pro setting. Encodes input as UTF-16 and discards upper byte (just as PasswordMaker Pro does for HMAC).
     HmacRipemd160,
 }
 
-/// When the leet replacement shown in leet.rs is applied. It is always applied to each password part when the required password length
+/// When the Leet replacement shown in leet.rs is applied.
+/// If Leet is enabled, the input will be converted to lower case.
+/// It is always applied to each password part when the required password length
 /// is longer than the length obtained by computing a single hash. This is important if the input data or output charset contains certain
 /// characters where the lower case representation depends on context (e.g. 'Σ').
 #[derive(Debug,Clone, Copy)]
 pub enum UseLeetWhenGenerating {
+    /// Do not apply Leet on input or output.
     NotAtAll,
+    /// Apply Leet on the input before computing a password part.
     Before {
+        /// The Leet level to apply to the input.
         level : LeetLevel,
     },
+    /// Apply Leet on the generated password-part. Beware that this will force the password to lower-case characters.
     After {
+        /// The Leet level to apply to the generated password parts.
         level : LeetLevel,
     },
+    /// Apply Leet both, to the input for the hasher, and the generated password parts. Beware that this will force the password to lower-case characters.
     BeforeAndAfter {
+        /// The Leet level to apply to both, input and generated password parts.
         level : LeetLevel,
     },
 }
 
+/// Settings for the parsing of the user's input URL.
+/// This is used to generate the `data` parameter for [`PasswordMaker`].
+#[allow(clippy::struct_excessive_bools)]
+#[derive(Debug, Clone)]
+pub struct UrlParsing {
+    use_protocol : ProtocolUsageMode,
+    use_userinfo : bool,
+    use_subdomains : bool,
+    use_domain : bool,
+    use_port_path : bool,
+}
+
+impl UrlParsing {
+    /// Creates a new `UrlParsing` instance with the given settings.
+    pub fn new(
+        use_protocol : ProtocolUsageMode,
+        use_userinfo : bool,
+        use_subdomains : bool,
+        use_domain : bool,
+        use_port_path : bool,
+    ) -> Self{
+        UrlParsing{ use_protocol, use_userinfo, use_subdomains, use_domain, use_port_path, }
+    }
+
+    /// Parses an input string, applying the settings in `self`, and generates a string suitable for
+    /// the `data` parameter of [`PasswordMaker`]
+    pub fn parse(&self, input : &str) -> String{
+        self.make_used_text_from_url(input)
+    }
+}
+
+/// The "Use Protocol" checkbox in PasswordMaker Pro Javascript Edition has some weird behaviour, that's probably a bug.
+/// This enum lets you select how to hande the case that the user wants to use the Protocol, but the input string doesn't contain one.
+#[derive(Debug, Clone, Copy)]
+pub enum ProtocolUsageMode{
+    /// The protocol part of the URI is not used in the output.
+    Ignored,
+    /// The protocol part of the URI is used in the output, if it's non-empty in the input. Otherwise it isn't.
+    Used,
+    /// The protocol part of the URI is used in the output, if it's non-empty in the input. Otherwise the string "undefined" is used in the output.
+    /// This mirrors behaviour of the PasswordMaker Pro Javascript Edition.
+    UsedWithUndefinedIfEmpty,
+}
+
 /// Error returned if the supplied input did not meet expectations.
 /// The two "missing" variants are self-explanatory, but the `InsufficientCharset` might need some explanation:
 /// `InsufficientCharset` means that the output character set does not contain at least two grapheme clusters.
@@ -167,8 +298,11 @@ pub enum UseLeetWhenGenerating {
 /// any number of grapheme clusters lower than 2 forms a nonsensical input. There simply is no base-1 or base-0 number system.
 #[derive(Debug, Clone, Copy)]
 pub enum GenerationError {
+    /// Password generation failed, because the user did not supply a master password.
     MissingMasterPassword,
+    /// Password generation failed, because the user did not supply a text-to-use.
     MissingTextToUse,
+    /// Password generation failed, because the character set supplied by the user did not contain at least 2 grapheme clusters.
     InsufficientCharset
 }
 
diff --git a/src/passwordmaker/hmac.rs b/src/passwordmaker/hmac.rs
new file mode 100644
index 0000000..4c9d6aa
--- /dev/null
+++ b/src/passwordmaker/hmac.rs
@@ -0,0 +1,58 @@
+use crate::Hasher;
+
+pub(super) fn hmac<T, K, M>(key : K, data : M) -> T::Output
+    where T : Hasher,
+    T::Output : AsRef<[u8]>,
+    K : Iterator<Item=u8> + Clone,
+    M : Iterator<Item=u8>,
+{
+    let key_len = key.clone().count();
+    let key =  if key_len > 64 {
+        KeyOrHash::from_hash(T::hash(&key.collect::<Vec<_>>()))
+    } else {
+        KeyOrHash::from_key(key)
+    };
+    let key = key.chain(std::iter::repeat(0)); //if key[i] does not exist, use 0 instead.
+
+    let mut inner_pad = [0u8;64];
+    let mut outer_pad = [0u8;64];
+
+    let pads = inner_pad.iter_mut().zip(outer_pad.iter_mut());
+    for ((i,o),k) in pads.zip(key) {
+        *i = k ^ 0x36;
+        *o = k ^ 0x5C;
+    }
+
+    let hash = T::hash(&inner_pad.iter().copied().chain(data).collect::<Vec<_>>());
+    T::hash(&outer_pad.iter().chain(hash.as_ref().iter()).copied().collect::<Vec<_>>())
+}
+
+enum KeyOrHash<K: Iterator<Item=u8>, H: AsRef<[u8]>> {
+    Key(K),
+    Hash{
+        hash : H,
+        idx : usize
+    }
+}
+
+impl<K: Iterator<Item=u8>, H: AsRef<[u8]>> KeyOrHash<K, H>{
+    fn from_key(key : K) -> Self {
+        Self::Key(key)
+    }
+    fn from_hash(hash : H) -> Self {
+        Self::Hash { hash, idx: 0 }
+    }
+}
+
+impl<K: Iterator<Item=u8>, H: AsRef<[u8]>> Iterator for KeyOrHash<K, H>{
+    type Item = u8;
+    fn next(&mut self) -> Option<Self::Item> {
+        match self {
+            KeyOrHash::Key(k) => k.next(),
+            KeyOrHash::Hash { hash: owned, idx } => {
+                *idx += 1;
+                owned.as_ref().get(*idx-1).copied()
+            },
+        }
+    }
+}
+\ No newline at end of file
diff --git a/src/passwordmaker/mod.rs b/src/passwordmaker/mod.rs
index 4874758..eb39c9e 100644
--- a/src/passwordmaker/mod.rs
+++ b/src/passwordmaker/mod.rs
@@ -10,6 +10,7 @@ use super::Hasher;
 mod remainders;
 mod remainders_impl;
 mod grapheme;
+mod hmac;
 pub(crate) mod leet;
 
 impl<'y, H : super::HasherList> super::PasswordMaker<'y, H>{
@@ -18,8 +19,8 @@ impl<'y, H : super::HasherList> super::PasswordMaker<'y, H>{
     }
 
     pub(super) fn generate_password_verified_input(self) -> String {
-        let modified_data = self.data.to_owned() + self.username + self.modifier;
-        let key = self.key.to_owned();
+        let modified_data = self.data + self.username + self.modifier;
+        let key = self.key;
         let get_modified_key = move |i : usize| { if i == 0 {key.clone()} else {key.clone() + "\n" + &i.to_string()}};
     
         //In Passwordmaker Pro, leet is applied on a per-password-part basis. This means that if a password part ends in an upper-case Sigma,
@@ -111,7 +112,7 @@ impl<'y, H : super::HasherList> super::PasswordMaker<'y, H>{
         let data = leetified_data.as_deref().unwrap_or(data);
         let key = yeet_upper_bytes(&key);
         let data = yeet_upper_bytes(data);
-        let hash = hmac::<H::MD5,_,_>(key, data);
+        let hash = hmac::hmac::<H::MD5,_,_>(key, data);
         let hash_as_integer = u128::from_be_bytes(hash);
         let grapheme_indices : Vec<_> = hash_as_integer.calc_remainders(characters.len() as u128).map(|llll| llll as usize).collect();
         let grapheme_indices = yoink_additional_graphemes_for_06_if_needed(grapheme_indices);
@@ -225,7 +226,7 @@ fn modern_hmac_to_grapheme_indices<T, F, C, Z, D, U>(key : &str, data: &str, to_
 {
     let key = yeet_upper_bytes(key);
     let data = yeet_upper_bytes(data);
-    to_dividend(hmac::<T,_,_>(key, data)).calc_remainders(divisor).map(to_usize).collect()
+    to_dividend(hmac::hmac::<T,_,_>(key, data)).calc_remainders(divisor).map(to_usize).collect()
 }
 
 fn modern_message_to_grapheme_indices<T, F, C, Z, D, U>(data: &str, to_dividend : F, divisor : D, to_usize : U) -> Vec<usize>
@@ -353,61 +354,4 @@ fn yeet_upper_bytes(input : &str) -> impl Iterator<Item=u8> + Clone + '_ {
 fn yoink_additional_graphemes_for_06_if_needed(mut input : Vec<usize>) -> Vec<usize> {
     input.resize(32, 0);
     input
-}
-
-fn hmac<T, K, M>(key : K, data : M) -> T::Output
-    where T : Hasher,
-    T::Output : AsRef<[u8]>,
-    K : Iterator<Item=u8> + Clone,
-    M : Iterator<Item=u8>,
-{
-    let key_len = key.clone().count();
-    let key =  if key_len > 64 {
-        KeyOrHash::from_hash(T::hash(&key.collect::<Vec<_>>()))
-    } else {
-        KeyOrHash::from_key(key)
-    };
-    let key = key.chain(std::iter::repeat(0)); //if key[i] does not exist, use 0 instead.
-
-    let mut inner_pad = [0u8;64];
-    let mut outer_pad = [0u8;64];
-
-    let pads = inner_pad.iter_mut().zip(outer_pad.iter_mut());
-    for ((i,o),k) in pads.zip(key) {
-        *i = k ^ 0x36;
-        *o = k ^ 0x5C;
-    }
-
-    let hash = T::hash(&inner_pad.iter().copied().chain(data).collect::<Vec<_>>());
-    T::hash(&outer_pad.iter().chain(hash.as_ref().iter()).copied().collect::<Vec<_>>())
-}
-
-enum KeyOrHash<K: Iterator<Item=u8>, H: AsRef<[u8]>> {
-    Key(K),
-    Hash{
-        hash : H,
-        idx : usize
-    }
-}
-
-impl<K: Iterator<Item=u8>, H: AsRef<[u8]>> KeyOrHash<K, H>{
-    fn from_key(key : K) -> Self {
-        Self::Key(key)
-    }
-    fn from_hash(hash : H) -> Self {
-        Self::Hash { hash, idx: 0 }
-    }
-}
-
-impl<K: Iterator<Item=u8>, H: AsRef<[u8]>> Iterator for KeyOrHash<K, H>{
-    type Item = u8;
-    fn next(&mut self) -> Option<Self::Item> {
-        match self {
-            KeyOrHash::Key(k) => k.next(),
-            KeyOrHash::Hash { hash: owned, idx } => {
-                *idx += 1;
-                owned.as_ref().get(*idx-1).copied()
-            },
-        }
-    }
 }
 \ No newline at end of file
diff --git a/src/url_parsing/mod.rs b/src/url_parsing/mod.rs
new file mode 100644
index 0000000..e0a95ad
--- /dev/null
+++ b/src/url_parsing/mod.rs
@@ -0,0 +1,508 @@
+use crate::UrlParsing;
+use std::ops::Deref;
+use std::ops::Add;
+
+impl UrlParsing {
+    /// Computes a `used_text` from an input URL according to the passed in `UrlParsing` object.
+    /// Aims to be kinda compatible to Passwordmaker Pro.
+    pub(super) fn make_used_text_from_url(&self, input : &str, ) -> String {
+        parse_url(input).filter_by_settings(self).recombine()
+    }
+
+    fn is_protocol_used(&self) -> bool{
+        match self.use_protocol{
+            crate::ProtocolUsageMode::Ignored => false,
+            crate::ProtocolUsageMode::Used
+             | crate::ProtocolUsageMode::UsedWithUndefinedIfEmpty
+             => true,
+        }
+    }
+    fn use_protocol_undefined_fallback(&self) -> bool{
+        match self.use_protocol{
+            crate::ProtocolUsageMode::Ignored
+             | crate::ProtocolUsageMode::Used
+             => false,
+            crate::ProtocolUsageMode::UsedWithUndefinedIfEmpty => true,
+        }
+    }
+}
+
+#[cfg_attr(test, derive(PartialEq, Debug, Clone))]
+struct UrlParts<'s> {
+    protocol : &'s str,
+    userinfo : &'s str, //Treating this field separate is an addition to the functionaliyt offered by PasswordMaker Pro
+    subdomain : &'s str, //this is not part of the official URI spec. But PasswordMaker Pro uses it.
+    domain: &'s str,
+    port: &'s str, //this would not need to be separated from path_query_fragment, but it's easier to parse if it's separate.
+    path_query_fragment: &'s str //we don't need to separate those. Passwordmaker doesn't either.
+}
+
+impl<'s> UrlParts<'s> {
+    fn filter_by_settings(self, settings : &UrlParsing) -> UsedUrlParts<'s>{
+        let has_protocol = settings.is_protocol_used() && !self.protocol.is_empty();
+        UsedUrlParts{
+            protocol: //PasswordMaker Pro compatibility: Protocol is handled _weird_...
+                if has_protocol { self.protocol }
+                else if settings.use_protocol_undefined_fallback() { "undefined" }
+                else { <&str>::default() },
+            protocol_separator: if has_protocol { "://" } else { <&str>::default() }, //this is again some PasswordMaker Pro weirdness...
+            userinfo: if settings.use_userinfo { self.userinfo } else { <&str>::default() },
+            subdomain: if settings.use_subdomains { self.subdomain } else { <&str>::default() },
+            domain: if settings.use_domain { self.domain } else { <&str>::default() },
+            port: if settings.use_port_path { self.port } else { <&str>::default() },
+            path_query_fragment: if settings.use_port_path { self.path_query_fragment } else { <&str>::default() },
+        }
+    }
+}
+
+#[cfg_attr(test, derive(PartialEq, Debug))]
+struct UsedUrlParts<'s> {
+    protocol : &'s str,
+    protocol_separator : &'s str,
+    userinfo : &'s str, //Treating this field separate is an addition to the functionaliyt offered by PasswordMaker Pro
+    subdomain : &'s str, //this is not part of the official URI spec. But PasswordMaker Pro uses it.
+    domain: &'s str,
+    port: &'s str, //this would not need to be separated from path_query_fragment, but it's easier to parse if it's separate.
+    path_query_fragment: &'s str //we don't need to separate those. Passwordmaker doesn't either.
+}
+
+impl<'s> UsedUrlParts<'s> {
+    #[allow(clippy::doc_markdown)]
+    /// Tries to do assemble a string in a way that's at least somehow compatible to PasswordMaker Pro.
+    /// This prioritizes ease of use ("what the user expects") over correct URI parsing.
+    fn recombine(self) -> String {
+        //matching would need 64 arms... Too much work, soooo, a couple of ifs and less sanity instead.
+        let has_userinfo = !self.userinfo.is_empty();
+        let has_subdomain = !self.subdomain.is_empty();
+        let has_domain = !self.domain.is_empty();
+        let has_port = !self.port.is_empty();
+        let has_path_query_fragment = !self.path_query_fragment.is_empty();
+        
+        //by doing all logic on &str, we save allocations to the very last moment. Also, the syntax is more readable.
+        let parts = [
+            self.protocol,
+            self.protocol_separator,
+            self.userinfo,
+            if has_userinfo && (has_domain || has_subdomain || has_port|| has_path_query_fragment) { "@" } else { <&str>::default() },
+            self.subdomain,
+            if has_subdomain && has_domain { "." } else { <&str>::default() },
+            self.domain,
+            if has_port && (has_userinfo || has_domain || has_subdomain) { ":" } else { <&str>::default() },
+            self.port,
+            self.path_query_fragment,
+        ];
+
+        let needed_size = parts.iter().map(Deref::deref).map(<str>::len).sum();
+        parts.iter().map(Deref::deref).fold(String::with_capacity(needed_size), String::add)
+    }
+}
+
+#[allow(clippy::doc_markdown)]
+/// Parses the input URI in a way that resembles the behaviour of PasswordMaker Pro. This is intentionally not following the URI standard.
+/// It priorizes ease-of-use over strictly following the URI standard.
+/// The idea here is that users tend to input strings of the form "www.somedomain.com", what is not a valid URI (authority is not optional).
+/// Input of this form should still work though, in order not to confuse users.
+fn parse_url(input : &str) -> UrlParts{
+    let maybe_protocol = input.split_once(':');
+    let has_protocol = maybe_protocol.is_some();
+    let (protocol, rest) = maybe_protocol.unwrap_or((<&str>::default(), input));
+    let removed_authority_marker = rest.strip_prefix("//");
+    let has_authority = removed_authority_marker.is_some();
+    let rest = removed_authority_marker.unwrap_or(rest);
+
+    //Authority stops at first / character. Or, if none encountered, at end of input. Slash is part of path.
+    //If there is a protocol, but no authority, we must treat everything after the intial ':' as path though.
+    let first_character_of_path = if has_protocol && !has_authority {Some(0)} else {rest.find('/')};
+    let (authority, path_query_fragment) = first_character_of_path.map_or((rest, <&str>::default()),|mid| rest.split_at(mid));
+    //must split authority at '@' characters. Otherwise ':' is ambigious.
+    let (userinfo, host_and_port) = authority.split_once('@').unwrap_or((<&str>::default(), authority));
+    let (address, port) = host_and_port.split_once(':').unwrap_or((host_and_port, <&str>::default()));
+    let separator_between_subdom_and_domain = address.rmatch_indices('.').nth(1);
+    let (subdomain, domain_with_leading_dot) = separator_between_subdom_and_domain.map_or((<&str>::default(), address), |(i, _)| address.split_at(i));
+    let domain = domain_with_leading_dot.strip_prefix('.').unwrap_or(domain_with_leading_dot);
+    UrlParts{protocol, userinfo, subdomain, domain, port, path_query_fragment}
+}
+
+#[cfg(test)]
+mod url_parsing_tests {
+    use crate::ProtocolUsageMode;
+
+    use super::*;
+
+    /// Just tries to split some example urls and checks if the result is as expected. This tests against PasswordMaker Pro behaviour, not proper URI format.
+    #[test]
+    fn uri_splitting_test_full_uri(){
+        let input = "http://anon:12345@some.subdomain.of.some.domain.com:8080/some/path/with?query&and#fragment";
+        let expected = UrlParts{
+            protocol: "http",
+            userinfo: "anon:12345",
+            subdomain: "some.subdomain.of.some",
+            domain: "domain.com",
+            port: "8080",
+            path_query_fragment: "/some/path/with?query&and#fragment",
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+    #[test]
+    fn uri_splitting_test_no_userinfo(){
+        let input = "http://some.subdomain.of.some.domain.com:8080/some/path/with?query&and#fragment";
+        let expected = UrlParts{
+            protocol: "http",
+            userinfo: <&str>::default(),
+            subdomain: "some.subdomain.of.some",
+            domain: "domain.com",
+            port: "8080",
+            path_query_fragment: "/some/path/with?query&and#fragment",
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+    #[test]
+    fn uri_splitting_test_no_port(){  
+        let input = "http://anon:12345@some.subdomain.of.some.domain.com/some/path/with?query&and#fragment";
+        let expected = UrlParts{
+            protocol: "http",
+            userinfo: "anon:12345",
+            subdomain: "some.subdomain.of.some",
+            domain: "domain.com",
+            port: <&str>::default(),
+            path_query_fragment: "/some/path/with?query&and#fragment",
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+    #[test]
+    fn uri_splitting_test_no_domain(){  
+        let input = "http://anon:12345@:8080/some/path/with?query&and#fragment";
+        let expected = UrlParts{
+            protocol: "http",
+            userinfo: "anon:12345",
+            subdomain: <&str>::default(),
+            domain: <&str>::default(),
+            port: "8080",
+            path_query_fragment: "/some/path/with?query&and#fragment",
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+    #[test]
+    fn uri_splitting_test_no_domain_no_port(){  
+        let input = "http://anon:12345@/some/path/with?query&and#fragment";
+        let expected = UrlParts{
+            protocol: "http",
+            userinfo: "anon:12345",
+            subdomain: <&str>::default(),
+            domain: <&str>::default(),
+            port: <&str>::default(),
+            path_query_fragment: "/some/path/with?query&and#fragment",
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+    #[test]
+    fn uri_splitting_test_empty_path(){
+        let input = "http://anon:12345@some.subdomain.of.some.domain.com:8080";
+        let expected = UrlParts{
+            protocol: "http",
+            userinfo: "anon:12345",
+            subdomain: "some.subdomain.of.some",
+            domain: "domain.com",
+            port: "8080",
+            path_query_fragment: <&str>::default(),
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+    #[test]
+    fn uri_splitting_test_only_protocol_and_path(){
+        let input = "http:some/path/";
+        let expected = UrlParts{
+            protocol: "http",
+            userinfo: <&str>::default(),
+            subdomain: <&str>::default(),
+            domain: <&str>::default(),
+            port: <&str>::default(),
+            path_query_fragment: "some/path/",
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+
+    /// This triggers me. It should not work (scheme isn't optional), but users would miss it.
+    /// Password and Port are not included too, because those would be (correctly) identified as schemes.
+    #[test]
+    fn uri_splitting_missing_protocol(){
+        let input = "anon@some.subdomain.of.some.domain.com/some/path/with?query&and#fragment";
+        let expected = UrlParts{
+            protocol: <&str>::default(),
+            userinfo: "anon",
+            subdomain: "some.subdomain.of.some",
+            domain: "domain.com",
+            port: <&str>::default(),
+            path_query_fragment: "/some/path/with?query&and#fragment",
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+    #[test]
+    fn uri_splitting_just_domain_and_path(){
+        let input = "some.subdomain.of.some.domain.com/some/path/with?query&and#fragment";
+        let expected = UrlParts{
+            protocol: <&str>::default(),
+            userinfo: <&str>::default(),
+            subdomain: "some.subdomain.of.some",
+            domain: "domain.com",
+            port: <&str>::default(),
+            path_query_fragment: "/some/path/with?query&and#fragment",
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+    #[test]
+    fn uri_splitting_just_domain_and_subdomain(){
+        let input = "some.subdomain.of.some.domain.com";
+        let expected = UrlParts{
+            protocol: <&str>::default(),
+            userinfo: <&str>::default(),
+            subdomain: "some.subdomain.of.some",
+            domain: "domain.com",
+            port: <&str>::default(),
+            path_query_fragment: <&str>::default(),
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+    #[test]
+    fn uri_splitting_just_domain(){
+        let input = "domain.com";
+        let expected = UrlParts{
+            protocol: <&str>::default(),
+            userinfo: <&str>::default(),
+            subdomain: <&str>::default(),
+            domain: "domain.com",
+            port: <&str>::default(),
+            path_query_fragment: <&str>::default(),
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+    #[test]
+    fn uri_splitting_only_protocol(){
+        let input = "ftp:";
+        let expected = UrlParts{
+            protocol: "ftp",
+            userinfo: <&str>::default(),
+            subdomain: <&str>::default(),
+            domain: <&str>::default(),
+            port: <&str>::default(),
+            path_query_fragment: <&str>::default(),
+        };
+        let result = parse_url(input);
+        assert_eq!(result, expected);
+    }
+
+    // Above tests are incomplete. I mean, there are 64 combinations... And then there could be errors...
+    // Soo, let's just pretend it's fine, and if there are bugs, add the specific buggy input.
+    //-----------------------------------------------------------------------------
+    // Reassembly tests
+    // Again our valid input range is 64 values... And again we just test some samples that are known to be weird.
+    // For everything else, let's wait for bug reports.
+
+    /// However, for settings application, every combination can be tested.
+    #[test]
+    fn apply_settings_to_url_parts_no_undefined_protocol(){
+        for i in 0..64 {
+            let settings = UrlParsing {
+                use_protocol: if i%2 == 0 { ProtocolUsageMode::Used } else { ProtocolUsageMode::Ignored },
+                use_userinfo: (i/2)%2 == 0,
+                use_subdomains: (i/4)%2 == 0,
+                use_domain: (i/8)%2 == 0,
+                use_port_path: (i/16)%2 == 0,
+            };
+            
+            let inputs = UrlParts {
+                protocol: if (i/32)%2 == 0 {"proto"} else {""},
+                userinfo: "plasmic",
+                subdomain: "pirate",
+                domain: "hordes",
+                port: "420",
+                path_query_fragment: "under/blackened#banners",
+            };
+
+            let output = inputs.clone().filter_by_settings(&settings);
+            if settings.is_protocol_used() { assert_eq!(output.protocol, inputs.protocol) } else { assert_eq!(output.protocol, "") };
+            if settings.is_protocol_used() && !inputs.protocol.is_empty() { assert_eq!(output.protocol_separator, "://") } else { assert_eq!(output.protocol_separator, "") };
+            if settings.use_userinfo { assert_eq!(output.userinfo, inputs.userinfo) } else { assert_eq!(output.userinfo, "")};
+            if settings.use_subdomains { assert_eq!(output.subdomain, inputs.subdomain) } else { assert_eq!(output.subdomain, "")};
+            if settings.use_domain { assert_eq!(output.domain, inputs.domain) } else { assert_eq!(output.domain, "")};
+            if settings.use_port_path { assert_eq!(output.port, inputs.port) } else { assert_eq!(output.port, "")};
+            if settings.use_port_path { assert_eq!(output.path_query_fragment, inputs.path_query_fragment) } else { assert_eq!(output.path_query_fragment, "")};
+        }
+    }
+    #[test]
+    fn apply_settings_to_url_parts_undefined_protocol(){
+        for i in 0..64 {
+            let settings = UrlParsing {
+                use_protocol: if i%2 == 0 { ProtocolUsageMode::UsedWithUndefinedIfEmpty } else { ProtocolUsageMode::Ignored },
+                use_userinfo: (i/2)%2 == 0,
+                use_subdomains: (i/4)%2 == 0,
+                use_domain: (i/8)%2 == 0,
+                use_port_path: (i/16)%2 == 0,
+            };
+            
+            let inputs = UrlParts {
+                protocol: if (i/32)%2 == 0 {"proto"} else {""},
+                userinfo: "plasmic",
+                subdomain: "pirate",
+                domain: "hordes",
+                port: "420",
+                path_query_fragment: "under/blackened#banners",
+            };
+
+            let output = inputs.clone().filter_by_settings(&settings);
+            if settings.is_protocol_used() { 
+                if !inputs.protocol.is_empty() {
+                    assert_eq!(output.protocol, inputs.protocol) 
+                } else {
+                    assert_eq!(output.protocol, "undefined")
+                }
+            } else { 
+                assert_eq!(output.protocol, "") 
+            };
+            if settings.is_protocol_used() && !inputs.protocol.is_empty() { assert_eq!(output.protocol_separator, "://") } else { assert_eq!(output.protocol_separator, "") };
+            if settings.use_userinfo { assert_eq!(output.userinfo, inputs.userinfo) } else { assert_eq!(output.userinfo, "")};
+            if settings.use_subdomains { assert_eq!(output.subdomain, inputs.subdomain) } else { assert_eq!(output.subdomain, "")};
+            if settings.use_domain { assert_eq!(output.domain, inputs.domain) } else { assert_eq!(output.domain, "")};
+            if settings.use_port_path { assert_eq!(output.port, inputs.port) } else { assert_eq!(output.port, "")};
+            if settings.use_port_path { assert_eq!(output.path_query_fragment, inputs.path_query_fragment) } else { assert_eq!(output.path_query_fragment, "")};
+        }
+    }
+
+    #[test]
+    fn recombine_full_url_test() {
+        let input = UsedUrlParts{
+            protocol: "xmpp",
+            protocol_separator: "://",
+            userinfo: "horst:12345",
+            subdomain: "www",
+            domain: "example.com",
+            port: "8080",
+            path_query_fragment: "/some/path",
+        };
+        let result = input.recombine();
+        assert_eq!(result, "xmpp://horst:12345@www.example.com:8080/some/path");
+    }
+    #[test]
+    fn recombine_user_but_no_subdomain() {
+        let input = UsedUrlParts{
+            protocol: "xmpp",
+            protocol_separator: "://",
+            userinfo: "horst:12345",
+            subdomain: <&str>::default(),
+            domain: "example.com",
+            port: "8080",
+            path_query_fragment: "/some/path",
+        };
+        let result = input.recombine();
+        assert_eq!(result, "xmpp://horst:12345@example.com:8080/some/path");
+    }
+    #[test]
+    fn recombine_no_user_but_subdomain() {
+        let input = UsedUrlParts{
+            protocol: "xmpp",
+            protocol_separator: "://",
+            userinfo: <&str>::default(),
+            subdomain: "w3",
+            domain: "example.com",
+            port: "8080",
+            path_query_fragment: "/some/path",
+        };
+        let result = input.recombine();
+        assert_eq!(result, "xmpp://w3.example.com:8080/some/path");
+    }
+    #[test]
+    fn recombine_no_user_no_subdomain() {
+        let input = UsedUrlParts{
+            protocol: "xmpp",
+            protocol_separator: "://",
+            userinfo: <&str>::default(),
+            subdomain: <&str>::default(),
+            domain: "example.com",
+            port: "8080",
+            path_query_fragment: "/some/path",
+        };
+        let result = input.recombine();
+        assert_eq!(result, "xmpp://example.com:8080/some/path");
+    }
+    #[test]
+    fn recombine_no_user_no_subdomain_no_port() {
+        let input = UsedUrlParts{
+            protocol: "xmpp",
+            protocol_separator: "://",
+            userinfo: <&str>::default(),
+            subdomain: <&str>::default(),
+            domain: "example.com",
+            port: <&str>::default(),
+            path_query_fragment: "/some/path",
+        };
+        let result = input.recombine();
+        assert_eq!(result, "xmpp://example.com/some/path");
+    }
+    #[test]
+    fn recombine_undefined_protocol() {
+        let input = UsedUrlParts{
+            protocol: "undefined",
+            protocol_separator: <&str>::default(),
+            userinfo: "horst:12345",
+            subdomain: "www",
+            domain: "example.com",
+            port: "8080",
+            path_query_fragment: "/some/path",
+        };
+        let result = input.recombine();
+        assert_eq!(result, "undefinedhorst:12345@www.example.com:8080/some/path");
+    }
+    #[test]
+    fn recombine_undefined_protocol_no_user_no_subdomain() {
+        let input = UsedUrlParts{
+            protocol: "undefined",
+            protocol_separator: <&str>::default(),
+            userinfo: <&str>::default(),
+            subdomain: <&str>::default(),
+            domain: "example.com",
+            port: <&str>::default(),
+            path_query_fragment: "/some/path",
+        };
+        let result = input.recombine();
+        assert_eq!(result, "undefinedexample.com/some/path");
+    }
+    #[test]
+    fn recombine_no_protocol() {
+        let input = UsedUrlParts{
+            protocol: <&str>::default(),
+            protocol_separator: <&str>::default(),
+            userinfo: <&str>::default(),
+            subdomain: "www",
+            domain: "example.com",
+            port: <&str>::default(),
+            path_query_fragment: "/some/path",
+        };
+        let result = input.recombine();
+        assert_eq!(result, "www.example.com/some/path");
+    }
+    #[test]
+    fn recombine_empty_path() {
+        let input = UsedUrlParts{
+            protocol: "xmpp",
+            protocol_separator: "://",
+            userinfo: "horst:12345",
+            subdomain: "www",
+            domain: "example.com",
+            port: "8080",
+            path_query_fragment: <&str>::default(),
+        };
+        let result = input.recombine();
+        assert_eq!(result, "xmpp://horst:12345@www.example.com:8080");
+    }
+}
+\ No newline at end of file
author	Andreas Grois <andi@grois.info>	2022-10-09 14:00:38 +0200
committer	Andreas Grois <andi@grois.info>	2022-10-09 14:00:38 +0200
commit	d6d345207530ec3232d937aeee3b0c9255b33129 (patch)
tree	0645a54d0f695092ed6c0003777d2e212a99e5e5
parent	5e51b706d54a26470f33d1342f4666d5aab921fc (diff)