From 839e1096dde8e1e934a82d1cf0c4d3a43dc69f38 Mon Sep 17 00:00:00 2001 From: Andreas Grois Date: Wed, 26 Oct 2022 17:59:27 +0200 Subject: Use MulAssign in BaseConversion. Benchmarks show that MulAssign is quite a bit faster than Mul, especially since in this case it's mathematically proven that the multiplication cannot overflow. Also, removed skipping of leading zeros in long division. With the switch to multiplication after the first iteration, the chance that there actually are any leading zeros is on the order of 2^-26. I think at least. In any case, it's small. --- .../iterative_conversion_impl/mod.rs | 34 ++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'src/passwordmaker/base_conversion/iterative_conversion_impl/mod.rs') diff --git a/src/passwordmaker/base_conversion/iterative_conversion_impl/mod.rs b/src/passwordmaker/base_conversion/iterative_conversion_impl/mod.rs index 96ca497..d3e0045 100644 --- a/src/passwordmaker/base_conversion/iterative_conversion_impl/mod.rs +++ b/src/passwordmaker/base_conversion/iterative_conversion_impl/mod.rs @@ -9,7 +9,7 @@ mod precomputed_constants; #[cfg(all(not(feature="precomputed_max_powers"),feature="precomputed_common_max_powers"))] mod precomputed_common_constants; -use std::ops::{DivAssign, Mul}; +use std::ops::{DivAssign, Mul, MulAssign}; use std::convert::{TryFrom, TryInto}; use std::fmt::Display; use std::error::Error; @@ -70,6 +70,12 @@ impl Mul<&SixteenBytes> for &SixteenBytes{ } } +impl MulAssign<&usize> for SixteenBytes { + fn mul_assign(&mut self, rhs: &usize) { + self.0 *= *rhs as u128; + } +} + impl PrecomputedMaxPowers for SixteenBytes{} //-------------------------------------------------------------------------------------------------------------------------------------- @@ -253,6 +259,23 @@ impl Mul<&usize> for &ArbitraryBytes{ } } +//Done separately, because "mut references not allowed in const contexts" +impl MulAssign<&usize> for ArbitraryBytes{ + fn mul_assign(&mut self, rhs: &usize) { + #[cfg(target_pointer_width = "64")] + type Long = u128; + #[cfg(not(target_pointer_width = "64"))] + type Long = u64; + let rhs = *rhs as Long; + let carry = self.0.iter_mut().rev().fold(0 as Long, |carry, current| { + let res = (*current as Long) * rhs + carry; + *current = res as u32; + res >> 32 + }); + debug_assert_eq!(carry, 0); + } +} + impl Mul<&ArbitraryBytes> for &ArbitraryBytes where ArbitraryBytes : for<'a> From<&'a usize> { type Output = Option>; ///School method. I haven't tried Karatsuba, but rule of thumb is that it only gets faster at about 32 digits. We have 8 digits max. @@ -300,7 +323,7 @@ macro_rules! make_div_assign_with_remainder { debug_assert!((<$t_long>::MAX >> 32) as u128 >= <$t_divisor>::MAX as u128); let divisor = *rhs as $t_long; - let remainder = self.0.iter_mut().skip_while(|x| **x == 0).fold(0 as $t_long,|carry, current| { + let remainder = self.0.iter_mut().fold(0 as $t_long,|carry, current| { debug_assert_eq!(carry, carry & (<$t_divisor>::MAX as $t_long)); //carry has to be lower than divisor, and divisor is $t_divisor. let carry_shifted = carry << 32; let dividend = (carry_shifted) | (*current as $t_long); @@ -800,6 +823,13 @@ mod arbitrary_bytes_tests{ assert!(product.is_none()) } #[test] + fn mul_assign_usize(){ + let mut a = ArbitraryBytes::new([0x42a7bf02,0xffffffff,0xc7138bd5,0x12345678,0xabcde012]); + let b = 3usize; + a *= &b; + assert_eq!(a.0, [0xC7F73D08,0xFFFFFFFF,0x553AA37F,0x369D036A,0x0369A036]) + } + #[test] fn try_into_u32_test(){ let a = ArbitraryBytes::new([0,0,0,0,0xabcde012]); let b : u32 = (&a).try_into().unwrap(); -- cgit v1.2.3