diff options
author | Andreas Grois <andi@grois.info> | 2022-10-09 14:00:38 +0200 |
---|---|---|
committer | Andreas Grois <andi@grois.info> | 2022-10-09 14:00:38 +0200 |
commit | d6d345207530ec3232d937aeee3b0c9255b33129 (patch) | |
tree | 0645a54d0f695092ed6c0003777d2e212a99e5e5 /src | |
parent | 5e51b706d54a26470f33d1342f4666d5aab921fc (diff) |
Also add url_parsing to this crate.
Diffstat (limited to 'src')
-rw-r--r-- | src/lib.rs | 146 | ||||
-rw-r--r-- | src/passwordmaker/hmac.rs | 58 | ||||
-rw-r--r-- | src/passwordmaker/mod.rs | 66 | ||||
-rw-r--r-- | src/url_parsing/mod.rs | 508 |
4 files changed, 711 insertions, 67 deletions
@@ -1,4 +1,17 @@ +#![warn(missing_docs)] +//! Library that should allow quick implementation of tools that are compatible with PasswordMaker Pro. +//! +//! It forms the core of an upcoming PasswordMaker Pro compatible Sailfish OS App (as of yet unnamed). +//! This library intentionally does not depend on any specific implementation of the cryptographic hashes +//! it relies on. To see an example of how to integrate with the [Rust Crypto Hashes](https://github.com/RustCrypto/hashes), +//! see the integration tests. +//! +//! There are two main functions in this library: [`generate_password`][PasswordMaker::generate_password] and +//! [`parse()`][UrlParsing::parse]. + + mod passwordmaker; +mod url_parsing; use passwordmaker::{PasswordPartParameters, PasswordAssemblyParameters}; use passwordmaker::leet::LeetReplacementTable; use std::error::Error; @@ -7,8 +20,13 @@ use std::marker::PhantomData; /// Trait you need to implement for the various hash functions you need to provide. /// Currently only a single function, that computes the hash of a string slice, is needed. This may change in a later version. +/// +/// Beware: There is currently no way to put constraints on associated constants in Rust, so Block Size is not exposed. +/// It's anyhow the same (currently hardcoded) value for all supported algorithms. pub trait Hasher { + /// The output type of the respective hash function. Typically some form of byte array. type Output; + /// Function that takes a byte array as input, and generates the cryptographic hash of it as output. fn hash(input : &[u8]) -> Self::Output; } @@ -25,18 +43,23 @@ pub trait Ripemd160 : Hasher<Output = [u8;20]> {} /// List of hash functions to use. Trait may change in later versions to include constructors for actual hasher objects. pub trait HasherList { + /// The type that offers MD4 hashing. See the [`Md4`] trait. type MD4 : Md4; + /// The type that offers MD5 hashing. See the [`Md5`] trait. type MD5 : Md5; + /// The type that offers SHA1 hashing. See the [`Sha1`] trait. type SHA1 : Sha1; + /// The type that offers SHA256 hashing. See the [`Sha256`] trait. type SHA256 : Sha256; + /// The type that offers Ripemd160 hashing. See the [`Ripemd160`] trait. type RIPEMD160 : Ripemd160; } /// A single-use instance of PasswordMaker, created after all inputs are verified to be usable. /// Only has one method, which is to generate the password. pub struct PasswordMaker<'a, T : HasherList>{ - data : &'a str, //aka url aka used text - key : &'a str, //aka master password + data : String, //aka url aka used text + key : String, //aka master password username : &'a str, modifier : &'a str, password_part_parameters : PasswordPartParameters<'a>, //contains pre_leet, as this is different for different algorithms @@ -46,7 +69,41 @@ pub struct PasswordMaker<'a, T : HasherList>{ } impl<'a, T : HasherList> PasswordMaker<'a, T>{ - /// Validates user input and returns a PasswordMaker if the input is valid. + /// Generates a password in a way that's (hopefully) compatible to PasswordMaker Pro. Returns an error for unusable input. + /// + /// `data` is the string to use, typically a URL or a part of it. + /// `key` is the master password. + /// `hash_algorithm` is a PasswordMaker Pro algorithm selection. + /// `use_leet` details when to use leet, if at all. + /// `characters` is the list of output password characters. Actually this is not true. It's the list of grapheme clusters. + /// `username` is the "username" field of PasswordMaker Pro. + /// `modifier` is the "modifier" field of PasswordMaker Pro. + /// `password_length` is the desired password length to generate. + /// `prefix` is the prefix to which the password gets appended. Counts towards `password_length`. + /// `suffix` is the suffix appended to the password. Counts towards `password_length`. + pub fn generate_password( + data : String, + key: String, + hash_algorithm : HashAlgorithm, + use_leet : UseLeetWhenGenerating, + characters : &'a str, + username : &'a str, + modifier: &'a str, + password_length : usize, + prefix : &'a str, + suffix : &'a str, + ) -> Result<String, GenerationError>{ + Ok( + Self::validate_input(data, key, hash_algorithm, use_leet, characters, username, modifier, password_length, prefix, suffix)? + .generate() + ) + } + + + /// Validates user input and returns a `PasswordMaker` object if the input is valid. + /// Use this if you want to split input validation from actual password computation. + /// Otherwise, consider using the `generate_password` function for shorter code. + /// /// `data` is the string to use, typically a URL or a part of it. /// `key` is the master password. /// `hash_algorithm` is a PasswordMaker Pro algorithm selection. @@ -58,8 +115,8 @@ impl<'a, T : HasherList> PasswordMaker<'a, T>{ /// `prefix` is the prefix to which the password gets appended. Counts towards `password_length`. /// `suffix` is the suffix appended to the password. Counts towards `password_length`. pub fn validate_input( - data : &'a str, - key: &'a str, + data : String, + key: String, hash_algorithm : HashAlgorithm, use_leet : UseLeetWhenGenerating, characters : &'a str, @@ -107,14 +164,23 @@ impl<'a, T : HasherList> PasswordMaker<'a, T>{ #[cfg_attr(test, derive(strum_macros::EnumIter))] #[derive(Debug,Clone, Copy)] pub enum LeetLevel { + /// First Leet level: ["4", "b", "c", "d", "3", "f", "g", "h", "i", "j", "k", "1", "m", "n", "0", "p", "9", "r", "s", "7", "u", "v", "w", "x", "y", "z"] One, + /// Second Leet level: ["4", "b", "c", "d", "3", "f", "g", "h", "1", "j", "k", "1", "m", "n", "0", "p", "9", "r", "5", "7", "u", "v", "w", "x", "y", "2"] Two, + /// Third Leet level: ["4", "8", "c", "d", "3", "f", "6", "h", "'", "j", "k", "1", "m", "n", "0", "p", "9", "r", "5", "7", "u", "v", "w", "x", "'/", "2"] Three, + /// Fourth Leet level: ["@", "8", "c", "d", "3", "f", "6", "h", "'", "j", "k", "1", "m", "n", "0", "p", "9", "r", "5", "7", "u", "v", "w", "x", "'/", "2"] Four, + /// Fifth Leet level: ["@", "|3", "c", "d", "3", "f", "6", "#", "!", "7", "|<", "1", "m", "n", "0", "|>", "9", "|2", "$", "7", "u", "\\/", "w", "x", "'/", "2"] Five, + /// Sixth Leet level: ["@", "|3", "c", "|)", "&", "|=", "6", "#", "!", ",|", "|<", "1", "m", "n", "0", "|>", "9", "|2", "$", "7", "u", "\\/", "w", "x", "'/", "2"] Six, + /// Seventh Leet level: ["@", "|3", "[", "|)", "&", "|=", "6", "#", "!", ",|", "|<", "1", "^^", "^/", "0", "|*", "9", "|2", "5", "7", "(_)", "\\/", "\\/\\/", "><", "'/", "2"] Seven, + /// Eigth Leet level: ["@", "8", "(", "|)", "&", "|=", "6", "|-|", "!", "_|", "|(", "1", "|\\/|", "|\\|", "()", "|>", "(,)", "|2", "$", "|", "|_|", "\\/", "\\^/", ")(", "'/", "\"/_"] Eight, + /// Ninth Leet level: ["@", "8", "(", "|)", "&", "|=", "6", "|-|", "!", "_|", "|{", "|_", "/\\/\\", "|\\|", "()", "|>", "(,)", "|2", "$", "|", "|_|", "\\/", "\\^/", ")(", "'/", "\"/_"] Nine, } @@ -129,37 +195,102 @@ pub enum LeetLevel { /// The `HmacMd5Version06` is similarly ignoring the supplied characters and using hexadecimal numbers as output. #[derive(Debug,Clone, Copy)] pub enum HashAlgorithm { + /// Regular Md4 PasswordMaker Pro setting. Md4, + /// HAMC Md4 PasswordMaker Pro setting. Encodes input as UTF-16 and discards upper byte (just as PasswordMaker Pro does for HMAC). HmacMd4, + /// Regular Md5 PasswordMaker Pro setting. Md5, + /// Md5 as computed by PasswordMaker Pro version 0.6. Encodes input as UTF-16 and discards upper byte and outputs MD5 as hex number. Md5Version06, + /// HMAC Md5 PasswordMaker Pro setting. Encodes input as UTF-16 and discards upper byte (just as PasswordMaker Pro does for HMAC). HmacMd5, + /// HMAC Md5 as computed by PasswordMaker Pro version 0.6. Encodes input as UTF-16 and discards upper byte and outputs MD5 as hex number. HmacMd5Version06, + /// Regular Sha1 PasswordMaker Pro setting. Sha1, + /// HAMC Sha1 PasswordMaker Pro setting. Encodes input as UTF-16 and discards upper byte (just as PasswordMaker Pro does for HMAC). HmacSha1, + /// Regular Sha256 PasswordMaker Pro setting. Sha256, + /// HAMC Sha256 PasswordMaker Pro setting. Encodes input as UTF-16 and discards upper byte (just as PasswordMaker Pro does for HMAC). HmacSha256, + /// Regular Ripemd160 PasswordMaker Pro setting. Ripemd160, + /// HAMC Ripemd160 PasswordMaker Pro setting. Encodes input as UTF-16 and discards upper byte (just as PasswordMaker Pro does for HMAC). HmacRipemd160, } -/// When the leet replacement shown in leet.rs is applied. It is always applied to each password part when the required password length +/// When the Leet replacement shown in leet.rs is applied. +/// If Leet is enabled, the input will be converted to lower case. +/// It is always applied to each password part when the required password length /// is longer than the length obtained by computing a single hash. This is important if the input data or output charset contains certain /// characters where the lower case representation depends on context (e.g. 'Σ'). #[derive(Debug,Clone, Copy)] pub enum UseLeetWhenGenerating { + /// Do not apply Leet on input or output. NotAtAll, + /// Apply Leet on the input before computing a password part. Before { + /// The Leet level to apply to the input. level : LeetLevel, }, + /// Apply Leet on the generated password-part. Beware that this will force the password to lower-case characters. After { + /// The Leet level to apply to the generated password parts. level : LeetLevel, }, + /// Apply Leet both, to the input for the hasher, and the generated password parts. Beware that this will force the password to lower-case characters. BeforeAndAfter { + /// The Leet level to apply to both, input and generated password parts. level : LeetLevel, }, } +/// Settings for the parsing of the user's input URL. +/// This is used to generate the `data` parameter for [`PasswordMaker`]. +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone)] +pub struct UrlParsing { + use_protocol : ProtocolUsageMode, + use_userinfo : bool, + use_subdomains : bool, + use_domain : bool, + use_port_path : bool, +} + +impl UrlParsing { + /// Creates a new `UrlParsing` instance with the given settings. + pub fn new( + use_protocol : ProtocolUsageMode, + use_userinfo : bool, + use_subdomains : bool, + use_domain : bool, + use_port_path : bool, + ) -> Self{ + UrlParsing{ use_protocol, use_userinfo, use_subdomains, use_domain, use_port_path, } + } + + /// Parses an input string, applying the settings in `self`, and generates a string suitable for + /// the `data` parameter of [`PasswordMaker`] + pub fn parse(&self, input : &str) -> String{ + self.make_used_text_from_url(input) + } +} + +/// The "Use Protocol" checkbox in PasswordMaker Pro Javascript Edition has some weird behaviour, that's probably a bug. +/// This enum lets you select how to hande the case that the user wants to use the Protocol, but the input string doesn't contain one. +#[derive(Debug, Clone, Copy)] +pub enum ProtocolUsageMode{ + /// The protocol part of the URI is not used in the output. + Ignored, + /// The protocol part of the URI is used in the output, if it's non-empty in the input. Otherwise it isn't. + Used, + /// The protocol part of the URI is used in the output, if it's non-empty in the input. Otherwise the string "undefined" is used in the output. + /// This mirrors behaviour of the PasswordMaker Pro Javascript Edition. + UsedWithUndefinedIfEmpty, +} + /// Error returned if the supplied input did not meet expectations. /// The two "missing" variants are self-explanatory, but the `InsufficientCharset` might need some explanation: /// `InsufficientCharset` means that the output character set does not contain at least two grapheme clusters. @@ -167,8 +298,11 @@ pub enum UseLeetWhenGenerating { /// any number of grapheme clusters lower than 2 forms a nonsensical input. There simply is no base-1 or base-0 number system. #[derive(Debug, Clone, Copy)] pub enum GenerationError { + /// Password generation failed, because the user did not supply a master password. MissingMasterPassword, + /// Password generation failed, because the user did not supply a text-to-use. MissingTextToUse, + /// Password generation failed, because the character set supplied by the user did not contain at least 2 grapheme clusters. InsufficientCharset } diff --git a/src/passwordmaker/hmac.rs b/src/passwordmaker/hmac.rs new file mode 100644 index 0000000..4c9d6aa --- /dev/null +++ b/src/passwordmaker/hmac.rs @@ -0,0 +1,58 @@ +use crate::Hasher; + +pub(super) fn hmac<T, K, M>(key : K, data : M) -> T::Output + where T : Hasher, + T::Output : AsRef<[u8]>, + K : Iterator<Item=u8> + Clone, + M : Iterator<Item=u8>, +{ + let key_len = key.clone().count(); + let key = if key_len > 64 { + KeyOrHash::from_hash(T::hash(&key.collect::<Vec<_>>())) + } else { + KeyOrHash::from_key(key) + }; + let key = key.chain(std::iter::repeat(0)); //if key[i] does not exist, use 0 instead. + + let mut inner_pad = [0u8;64]; + let mut outer_pad = [0u8;64]; + + let pads = inner_pad.iter_mut().zip(outer_pad.iter_mut()); + for ((i,o),k) in pads.zip(key) { + *i = k ^ 0x36; + *o = k ^ 0x5C; + } + + let hash = T::hash(&inner_pad.iter().copied().chain(data).collect::<Vec<_>>()); + T::hash(&outer_pad.iter().chain(hash.as_ref().iter()).copied().collect::<Vec<_>>()) +} + +enum KeyOrHash<K: Iterator<Item=u8>, H: AsRef<[u8]>> { + Key(K), + Hash{ + hash : H, + idx : usize + } +} + +impl<K: Iterator<Item=u8>, H: AsRef<[u8]>> KeyOrHash<K, H>{ + fn from_key(key : K) -> Self { + Self::Key(key) + } + fn from_hash(hash : H) -> Self { + Self::Hash { hash, idx: 0 } + } +} + +impl<K: Iterator<Item=u8>, H: AsRef<[u8]>> Iterator for KeyOrHash<K, H>{ + type Item = u8; + fn next(&mut self) -> Option<Self::Item> { + match self { + KeyOrHash::Key(k) => k.next(), + KeyOrHash::Hash { hash: owned, idx } => { + *idx += 1; + owned.as_ref().get(*idx-1).copied() + }, + } + } +}
\ No newline at end of file diff --git a/src/passwordmaker/mod.rs b/src/passwordmaker/mod.rs index 4874758..eb39c9e 100644 --- a/src/passwordmaker/mod.rs +++ b/src/passwordmaker/mod.rs @@ -10,6 +10,7 @@ use super::Hasher; mod remainders; mod remainders_impl; mod grapheme; +mod hmac; pub(crate) mod leet; impl<'y, H : super::HasherList> super::PasswordMaker<'y, H>{ @@ -18,8 +19,8 @@ impl<'y, H : super::HasherList> super::PasswordMaker<'y, H>{ } pub(super) fn generate_password_verified_input(self) -> String { - let modified_data = self.data.to_owned() + self.username + self.modifier; - let key = self.key.to_owned(); + let modified_data = self.data + self.username + self.modifier; + let key = self.key; let get_modified_key = move |i : usize| { if i == 0 {key.clone()} else {key.clone() + "\n" + &i.to_string()}}; //In Passwordmaker Pro, leet is applied on a per-password-part basis. This means that if a password part ends in an upper-case Sigma, @@ -111,7 +112,7 @@ impl<'y, H : super::HasherList> super::PasswordMaker<'y, H>{ let data = leetified_data.as_deref().unwrap_or(data); let key = yeet_upper_bytes(&key); let data = yeet_upper_bytes(data); - let hash = hmac::<H::MD5,_,_>(key, data); + let hash = hmac::hmac::<H::MD5,_,_>(key, data); let hash_as_integer = u128::from_be_bytes(hash); let grapheme_indices : Vec<_> = hash_as_integer.calc_remainders(characters.len() as u128).map(|llll| llll as usize).collect(); let grapheme_indices = yoink_additional_graphemes_for_06_if_needed(grapheme_indices); @@ -225,7 +226,7 @@ fn modern_hmac_to_grapheme_indices<T, F, C, Z, D, U>(key : &str, data: &str, to_ { let key = yeet_upper_bytes(key); let data = yeet_upper_bytes(data); - to_dividend(hmac::<T,_,_>(key, data)).calc_remainders(divisor).map(to_usize).collect() + to_dividend(hmac::hmac::<T,_,_>(key, data)).calc_remainders(divisor).map(to_usize).collect() } fn modern_message_to_grapheme_indices<T, F, C, Z, D, U>(data: &str, to_dividend : F, divisor : D, to_usize : U) -> Vec<usize> @@ -353,61 +354,4 @@ fn yeet_upper_bytes(input : &str) -> impl Iterator<Item=u8> + Clone + '_ { fn yoink_additional_graphemes_for_06_if_needed(mut input : Vec<usize>) -> Vec<usize> { input.resize(32, 0); input -} - -fn hmac<T, K, M>(key : K, data : M) -> T::Output - where T : Hasher, - T::Output : AsRef<[u8]>, - K : Iterator<Item=u8> + Clone, - M : Iterator<Item=u8>, -{ - let key_len = key.clone().count(); - let key = if key_len > 64 { - KeyOrHash::from_hash(T::hash(&key.collect::<Vec<_>>())) - } else { - KeyOrHash::from_key(key) - }; - let key = key.chain(std::iter::repeat(0)); //if key[i] does not exist, use 0 instead. - - let mut inner_pad = [0u8;64]; - let mut outer_pad = [0u8;64]; - - let pads = inner_pad.iter_mut().zip(outer_pad.iter_mut()); - for ((i,o),k) in pads.zip(key) { - *i = k ^ 0x36; - *o = k ^ 0x5C; - } - - let hash = T::hash(&inner_pad.iter().copied().chain(data).collect::<Vec<_>>()); - T::hash(&outer_pad.iter().chain(hash.as_ref().iter()).copied().collect::<Vec<_>>()) -} - -enum KeyOrHash<K: Iterator<Item=u8>, H: AsRef<[u8]>> { - Key(K), - Hash{ - hash : H, - idx : usize - } -} - -impl<K: Iterator<Item=u8>, H: AsRef<[u8]>> KeyOrHash<K, H>{ - fn from_key(key : K) -> Self { - Self::Key(key) - } - fn from_hash(hash : H) -> Self { - Self::Hash { hash, idx: 0 } - } -} - -impl<K: Iterator<Item=u8>, H: AsRef<[u8]>> Iterator for KeyOrHash<K, H>{ - type Item = u8; - fn next(&mut self) -> Option<Self::Item> { - match self { - KeyOrHash::Key(k) => k.next(), - KeyOrHash::Hash { hash: owned, idx } => { - *idx += 1; - owned.as_ref().get(*idx-1).copied() - }, - } - } }
\ No newline at end of file diff --git a/src/url_parsing/mod.rs b/src/url_parsing/mod.rs new file mode 100644 index 0000000..e0a95ad --- /dev/null +++ b/src/url_parsing/mod.rs @@ -0,0 +1,508 @@ +use crate::UrlParsing; +use std::ops::Deref; +use std::ops::Add; + +impl UrlParsing { + /// Computes a `used_text` from an input URL according to the passed in `UrlParsing` object. + /// Aims to be kinda compatible to Passwordmaker Pro. + pub(super) fn make_used_text_from_url(&self, input : &str, ) -> String { + parse_url(input).filter_by_settings(self).recombine() + } + + fn is_protocol_used(&self) -> bool{ + match self.use_protocol{ + crate::ProtocolUsageMode::Ignored => false, + crate::ProtocolUsageMode::Used + | crate::ProtocolUsageMode::UsedWithUndefinedIfEmpty + => true, + } + } + fn use_protocol_undefined_fallback(&self) -> bool{ + match self.use_protocol{ + crate::ProtocolUsageMode::Ignored + | crate::ProtocolUsageMode::Used + => false, + crate::ProtocolUsageMode::UsedWithUndefinedIfEmpty => true, + } + } +} + +#[cfg_attr(test, derive(PartialEq, Debug, Clone))] +struct UrlParts<'s> { + protocol : &'s str, + userinfo : &'s str, //Treating this field separate is an addition to the functionaliyt offered by PasswordMaker Pro + subdomain : &'s str, //this is not part of the official URI spec. But PasswordMaker Pro uses it. + domain: &'s str, + port: &'s str, //this would not need to be separated from path_query_fragment, but it's easier to parse if it's separate. + path_query_fragment: &'s str //we don't need to separate those. Passwordmaker doesn't either. +} + +impl<'s> UrlParts<'s> { + fn filter_by_settings(self, settings : &UrlParsing) -> UsedUrlParts<'s>{ + let has_protocol = settings.is_protocol_used() && !self.protocol.is_empty(); + UsedUrlParts{ + protocol: //PasswordMaker Pro compatibility: Protocol is handled _weird_... + if has_protocol { self.protocol } + else if settings.use_protocol_undefined_fallback() { "undefined" } + else { <&str>::default() }, + protocol_separator: if has_protocol { "://" } else { <&str>::default() }, //this is again some PasswordMaker Pro weirdness... + userinfo: if settings.use_userinfo { self.userinfo } else { <&str>::default() }, + subdomain: if settings.use_subdomains { self.subdomain } else { <&str>::default() }, + domain: if settings.use_domain { self.domain } else { <&str>::default() }, + port: if settings.use_port_path { self.port } else { <&str>::default() }, + path_query_fragment: if settings.use_port_path { self.path_query_fragment } else { <&str>::default() }, + } + } +} + +#[cfg_attr(test, derive(PartialEq, Debug))] +struct UsedUrlParts<'s> { + protocol : &'s str, + protocol_separator : &'s str, + userinfo : &'s str, //Treating this field separate is an addition to the functionaliyt offered by PasswordMaker Pro + subdomain : &'s str, //this is not part of the official URI spec. But PasswordMaker Pro uses it. + domain: &'s str, + port: &'s str, //this would not need to be separated from path_query_fragment, but it's easier to parse if it's separate. + path_query_fragment: &'s str //we don't need to separate those. Passwordmaker doesn't either. +} + +impl<'s> UsedUrlParts<'s> { + #[allow(clippy::doc_markdown)] + /// Tries to do assemble a string in a way that's at least somehow compatible to PasswordMaker Pro. + /// This prioritizes ease of use ("what the user expects") over correct URI parsing. + fn recombine(self) -> String { + //matching would need 64 arms... Too much work, soooo, a couple of ifs and less sanity instead. + let has_userinfo = !self.userinfo.is_empty(); + let has_subdomain = !self.subdomain.is_empty(); + let has_domain = !self.domain.is_empty(); + let has_port = !self.port.is_empty(); + let has_path_query_fragment = !self.path_query_fragment.is_empty(); + + //by doing all logic on &str, we save allocations to the very last moment. Also, the syntax is more readable. + let parts = [ + self.protocol, + self.protocol_separator, + self.userinfo, + if has_userinfo && (has_domain || has_subdomain || has_port|| has_path_query_fragment) { "@" } else { <&str>::default() }, + self.subdomain, + if has_subdomain && has_domain { "." } else { <&str>::default() }, + self.domain, + if has_port && (has_userinfo || has_domain || has_subdomain) { ":" } else { <&str>::default() }, + self.port, + self.path_query_fragment, + ]; + + let needed_size = parts.iter().map(Deref::deref).map(<str>::len).sum(); + parts.iter().map(Deref::deref).fold(String::with_capacity(needed_size), String::add) + } +} + +#[allow(clippy::doc_markdown)] +/// Parses the input URI in a way that resembles the behaviour of PasswordMaker Pro. This is intentionally not following the URI standard. +/// It priorizes ease-of-use over strictly following the URI standard. +/// The idea here is that users tend to input strings of the form "www.somedomain.com", what is not a valid URI (authority is not optional). +/// Input of this form should still work though, in order not to confuse users. +fn parse_url(input : &str) -> UrlParts{ + let maybe_protocol = input.split_once(':'); + let has_protocol = maybe_protocol.is_some(); + let (protocol, rest) = maybe_protocol.unwrap_or((<&str>::default(), input)); + let removed_authority_marker = rest.strip_prefix("//"); + let has_authority = removed_authority_marker.is_some(); + let rest = removed_authority_marker.unwrap_or(rest); + + //Authority stops at first / character. Or, if none encountered, at end of input. Slash is part of path. + //If there is a protocol, but no authority, we must treat everything after the intial ':' as path though. + let first_character_of_path = if has_protocol && !has_authority {Some(0)} else {rest.find('/')}; + let (authority, path_query_fragment) = first_character_of_path.map_or((rest, <&str>::default()),|mid| rest.split_at(mid)); + //must split authority at '@' characters. Otherwise ':' is ambigious. + let (userinfo, host_and_port) = authority.split_once('@').unwrap_or((<&str>::default(), authority)); + let (address, port) = host_and_port.split_once(':').unwrap_or((host_and_port, <&str>::default())); + let separator_between_subdom_and_domain = address.rmatch_indices('.').nth(1); + let (subdomain, domain_with_leading_dot) = separator_between_subdom_and_domain.map_or((<&str>::default(), address), |(i, _)| address.split_at(i)); + let domain = domain_with_leading_dot.strip_prefix('.').unwrap_or(domain_with_leading_dot); + UrlParts{protocol, userinfo, subdomain, domain, port, path_query_fragment} +} + +#[cfg(test)] +mod url_parsing_tests { + use crate::ProtocolUsageMode; + + use super::*; + + /// Just tries to split some example urls and checks if the result is as expected. This tests against PasswordMaker Pro behaviour, not proper URI format. + #[test] + fn uri_splitting_test_full_uri(){ + let input = "http://anon:12345@some.subdomain.of.some.domain.com:8080/some/path/with?query&and#fragment"; + let expected = UrlParts{ + protocol: "http", + userinfo: "anon:12345", + subdomain: "some.subdomain.of.some", + domain: "domain.com", + port: "8080", + path_query_fragment: "/some/path/with?query&and#fragment", + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + #[test] + fn uri_splitting_test_no_userinfo(){ + let input = "http://some.subdomain.of.some.domain.com:8080/some/path/with?query&and#fragment"; + let expected = UrlParts{ + protocol: "http", + userinfo: <&str>::default(), + subdomain: "some.subdomain.of.some", + domain: "domain.com", + port: "8080", + path_query_fragment: "/some/path/with?query&and#fragment", + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + #[test] + fn uri_splitting_test_no_port(){ + let input = "http://anon:12345@some.subdomain.of.some.domain.com/some/path/with?query&and#fragment"; + let expected = UrlParts{ + protocol: "http", + userinfo: "anon:12345", + subdomain: "some.subdomain.of.some", + domain: "domain.com", + port: <&str>::default(), + path_query_fragment: "/some/path/with?query&and#fragment", + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + #[test] + fn uri_splitting_test_no_domain(){ + let input = "http://anon:12345@:8080/some/path/with?query&and#fragment"; + let expected = UrlParts{ + protocol: "http", + userinfo: "anon:12345", + subdomain: <&str>::default(), + domain: <&str>::default(), + port: "8080", + path_query_fragment: "/some/path/with?query&and#fragment", + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + #[test] + fn uri_splitting_test_no_domain_no_port(){ + let input = "http://anon:12345@/some/path/with?query&and#fragment"; + let expected = UrlParts{ + protocol: "http", + userinfo: "anon:12345", + subdomain: <&str>::default(), + domain: <&str>::default(), + port: <&str>::default(), + path_query_fragment: "/some/path/with?query&and#fragment", + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + #[test] + fn uri_splitting_test_empty_path(){ + let input = "http://anon:12345@some.subdomain.of.some.domain.com:8080"; + let expected = UrlParts{ + protocol: "http", + userinfo: "anon:12345", + subdomain: "some.subdomain.of.some", + domain: "domain.com", + port: "8080", + path_query_fragment: <&str>::default(), + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + #[test] + fn uri_splitting_test_only_protocol_and_path(){ + let input = "http:some/path/"; + let expected = UrlParts{ + protocol: "http", + userinfo: <&str>::default(), + subdomain: <&str>::default(), + domain: <&str>::default(), + port: <&str>::default(), + path_query_fragment: "some/path/", + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + + /// This triggers me. It should not work (scheme isn't optional), but users would miss it. + /// Password and Port are not included too, because those would be (correctly) identified as schemes. + #[test] + fn uri_splitting_missing_protocol(){ + let input = "anon@some.subdomain.of.some.domain.com/some/path/with?query&and#fragment"; + let expected = UrlParts{ + protocol: <&str>::default(), + userinfo: "anon", + subdomain: "some.subdomain.of.some", + domain: "domain.com", + port: <&str>::default(), + path_query_fragment: "/some/path/with?query&and#fragment", + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + #[test] + fn uri_splitting_just_domain_and_path(){ + let input = "some.subdomain.of.some.domain.com/some/path/with?query&and#fragment"; + let expected = UrlParts{ + protocol: <&str>::default(), + userinfo: <&str>::default(), + subdomain: "some.subdomain.of.some", + domain: "domain.com", + port: <&str>::default(), + path_query_fragment: "/some/path/with?query&and#fragment", + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + #[test] + fn uri_splitting_just_domain_and_subdomain(){ + let input = "some.subdomain.of.some.domain.com"; + let expected = UrlParts{ + protocol: <&str>::default(), + userinfo: <&str>::default(), + subdomain: "some.subdomain.of.some", + domain: "domain.com", + port: <&str>::default(), + path_query_fragment: <&str>::default(), + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + #[test] + fn uri_splitting_just_domain(){ + let input = "domain.com"; + let expected = UrlParts{ + protocol: <&str>::default(), + userinfo: <&str>::default(), + subdomain: <&str>::default(), + domain: "domain.com", + port: <&str>::default(), + path_query_fragment: <&str>::default(), + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + #[test] + fn uri_splitting_only_protocol(){ + let input = "ftp:"; + let expected = UrlParts{ + protocol: "ftp", + userinfo: <&str>::default(), + subdomain: <&str>::default(), + domain: <&str>::default(), + port: <&str>::default(), + path_query_fragment: <&str>::default(), + }; + let result = parse_url(input); + assert_eq!(result, expected); + } + + // Above tests are incomplete. I mean, there are 64 combinations... And then there could be errors... + // Soo, let's just pretend it's fine, and if there are bugs, add the specific buggy input. + //----------------------------------------------------------------------------- + // Reassembly tests + // Again our valid input range is 64 values... And again we just test some samples that are known to be weird. + // For everything else, let's wait for bug reports. + + /// However, for settings application, every combination can be tested. + #[test] + fn apply_settings_to_url_parts_no_undefined_protocol(){ + for i in 0..64 { + let settings = UrlParsing { + use_protocol: if i%2 == 0 { ProtocolUsageMode::Used } else { ProtocolUsageMode::Ignored }, + use_userinfo: (i/2)%2 == 0, + use_subdomains: (i/4)%2 == 0, + use_domain: (i/8)%2 == 0, + use_port_path: (i/16)%2 == 0, + }; + + let inputs = UrlParts { + protocol: if (i/32)%2 == 0 {"proto"} else {""}, + userinfo: "plasmic", + subdomain: "pirate", + domain: "hordes", + port: "420", + path_query_fragment: "under/blackened#banners", + }; + + let output = inputs.clone().filter_by_settings(&settings); + if settings.is_protocol_used() { assert_eq!(output.protocol, inputs.protocol) } else { assert_eq!(output.protocol, "") }; + if settings.is_protocol_used() && !inputs.protocol.is_empty() { assert_eq!(output.protocol_separator, "://") } else { assert_eq!(output.protocol_separator, "") }; + if settings.use_userinfo { assert_eq!(output.userinfo, inputs.userinfo) } else { assert_eq!(output.userinfo, "")}; + if settings.use_subdomains { assert_eq!(output.subdomain, inputs.subdomain) } else { assert_eq!(output.subdomain, "")}; + if settings.use_domain { assert_eq!(output.domain, inputs.domain) } else { assert_eq!(output.domain, "")}; + if settings.use_port_path { assert_eq!(output.port, inputs.port) } else { assert_eq!(output.port, "")}; + if settings.use_port_path { assert_eq!(output.path_query_fragment, inputs.path_query_fragment) } else { assert_eq!(output.path_query_fragment, "")}; + } + } + #[test] + fn apply_settings_to_url_parts_undefined_protocol(){ + for i in 0..64 { + let settings = UrlParsing { + use_protocol: if i%2 == 0 { ProtocolUsageMode::UsedWithUndefinedIfEmpty } else { ProtocolUsageMode::Ignored }, + use_userinfo: (i/2)%2 == 0, + use_subdomains: (i/4)%2 == 0, + use_domain: (i/8)%2 == 0, + use_port_path: (i/16)%2 == 0, + }; + + let inputs = UrlParts { + protocol: if (i/32)%2 == 0 {"proto"} else {""}, + userinfo: "plasmic", + subdomain: "pirate", + domain: "hordes", + port: "420", + path_query_fragment: "under/blackened#banners", + }; + + let output = inputs.clone().filter_by_settings(&settings); + if settings.is_protocol_used() { + if !inputs.protocol.is_empty() { + assert_eq!(output.protocol, inputs.protocol) + } else { + assert_eq!(output.protocol, "undefined") + } + } else { + assert_eq!(output.protocol, "") + }; + if settings.is_protocol_used() && !inputs.protocol.is_empty() { assert_eq!(output.protocol_separator, "://") } else { assert_eq!(output.protocol_separator, "") }; + if settings.use_userinfo { assert_eq!(output.userinfo, inputs.userinfo) } else { assert_eq!(output.userinfo, "")}; + if settings.use_subdomains { assert_eq!(output.subdomain, inputs.subdomain) } else { assert_eq!(output.subdomain, "")}; + if settings.use_domain { assert_eq!(output.domain, inputs.domain) } else { assert_eq!(output.domain, "")}; + if settings.use_port_path { assert_eq!(output.port, inputs.port) } else { assert_eq!(output.port, "")}; + if settings.use_port_path { assert_eq!(output.path_query_fragment, inputs.path_query_fragment) } else { assert_eq!(output.path_query_fragment, "")}; + } + } + + #[test] + fn recombine_full_url_test() { + let input = UsedUrlParts{ + protocol: "xmpp", + protocol_separator: "://", + userinfo: "horst:12345", + subdomain: "www", + domain: "example.com", + port: "8080", + path_query_fragment: "/some/path", + }; + let result = input.recombine(); + assert_eq!(result, "xmpp://horst:12345@www.example.com:8080/some/path"); + } + #[test] + fn recombine_user_but_no_subdomain() { + let input = UsedUrlParts{ + protocol: "xmpp", + protocol_separator: "://", + userinfo: "horst:12345", + subdomain: <&str>::default(), + domain: "example.com", + port: "8080", + path_query_fragment: "/some/path", + }; + let result = input.recombine(); + assert_eq!(result, "xmpp://horst:12345@example.com:8080/some/path"); + } + #[test] + fn recombine_no_user_but_subdomain() { + let input = UsedUrlParts{ + protocol: "xmpp", + protocol_separator: "://", + userinfo: <&str>::default(), + subdomain: "w3", + domain: "example.com", + port: "8080", + path_query_fragment: "/some/path", + }; + let result = input.recombine(); + assert_eq!(result, "xmpp://w3.example.com:8080/some/path"); + } + #[test] + fn recombine_no_user_no_subdomain() { + let input = UsedUrlParts{ + protocol: "xmpp", + protocol_separator: "://", + userinfo: <&str>::default(), + subdomain: <&str>::default(), + domain: "example.com", + port: "8080", + path_query_fragment: "/some/path", + }; + let result = input.recombine(); + assert_eq!(result, "xmpp://example.com:8080/some/path"); + } + #[test] + fn recombine_no_user_no_subdomain_no_port() { + let input = UsedUrlParts{ + protocol: "xmpp", + protocol_separator: "://", + userinfo: <&str>::default(), + subdomain: <&str>::default(), + domain: "example.com", + port: <&str>::default(), + path_query_fragment: "/some/path", + }; + let result = input.recombine(); + assert_eq!(result, "xmpp://example.com/some/path"); + } + #[test] + fn recombine_undefined_protocol() { + let input = UsedUrlParts{ + protocol: "undefined", + protocol_separator: <&str>::default(), + userinfo: "horst:12345", + subdomain: "www", + domain: "example.com", + port: "8080", + path_query_fragment: "/some/path", + }; + let result = input.recombine(); + assert_eq!(result, "undefinedhorst:12345@www.example.com:8080/some/path"); + } + #[test] + fn recombine_undefined_protocol_no_user_no_subdomain() { + let input = UsedUrlParts{ + protocol: "undefined", + protocol_separator: <&str>::default(), + userinfo: <&str>::default(), + subdomain: <&str>::default(), + domain: "example.com", + port: <&str>::default(), + path_query_fragment: "/some/path", + }; + let result = input.recombine(); + assert_eq!(result, "undefinedexample.com/some/path"); + } + #[test] + fn recombine_no_protocol() { + let input = UsedUrlParts{ + protocol: <&str>::default(), + protocol_separator: <&str>::default(), + userinfo: <&str>::default(), + subdomain: "www", + domain: "example.com", + port: <&str>::default(), + path_query_fragment: "/some/path", + }; + let result = input.recombine(); + assert_eq!(result, "www.example.com/some/path"); + } + #[test] + fn recombine_empty_path() { + let input = UsedUrlParts{ + protocol: "xmpp", + protocol_separator: "://", + userinfo: "horst:12345", + subdomain: "www", + domain: "example.com", + port: "8080", + path_query_fragment: <&str>::default(), + }; + let result = input.recombine(); + assert_eq!(result, "xmpp://horst:12345@www.example.com:8080"); + } +}
\ No newline at end of file |