From 839e1096dde8e1e934a82d1cf0c4d3a43dc69f38 Mon Sep 17 00:00:00 2001
From: Andreas Grois <andi@grois.info>
Date: Wed, 26 Oct 2022 17:59:27 +0200
Subject: Use MulAssign in BaseConversion.

Benchmarks show that MulAssign is quite a bit faster than Mul,
especially since in this case it's mathematically proven that the
multiplication cannot overflow.
Also, removed skipping of leading zeros in long division. With the
switch to multiplication after the first iteration, the chance that
there actually are any leading zeros is on the order of 2^-26.
I think at least. In any case, it's small.
---
 .../base_conversion/iterative_conversion.rs        | 20 +++++++++----
 .../iterative_conversion_impl/mod.rs               | 34 ++++++++++++++++++++--
 2 files changed, 46 insertions(+), 8 deletions(-)
diff --git a/src/passwordmaker/base_conversion/iterative_conversion.rs b/src/passwordmaker/base_conversion/iterative_conversion.rs
index 7d52e9b..710903e 100644
--- a/src/passwordmaker/base_conversion/iterative_conversion.rs
+++ b/src/passwordmaker/base_conversion/iterative_conversion.rs
@@ -9,7 +9,7 @@
 //! If you have any great idea how to improve it: Make a merge request!
 
 use std::convert::TryInto;
-use std::ops::{Mul, DivAssign};
+use std::ops::{Mul, DivAssign, MulAssign};
 use std::iter::successors;
 
 pub(crate) struct IterativeBaseConversion<V,B>{
@@ -59,10 +59,10 @@ impl<V,B> IterativeBaseConversion<V,B>
 }
 
 impl<V,B> std::iter::Iterator for IterativeBaseConversion<V,B>
-    where V : for<'a> DivAssign<&'a B> + //used between steps to go to next-lower current_base_power
+    where V : for<'a> DivAssign<&'a B> + //used in the first iteration to ensure that MulAssign cannot overflow.
               RemAssignWithQuotient+     //used to get the result of each step.
-              TryInto<B>,                //used to convert the result of each step. We _know_ this cannot fail, but requiring Into would be wrong.
-          for<'a> &'a V : Mul<&'a B, Output = Option<V>> 
+              TryInto<B>+                //used to convert the result of each step. We _know_ this cannot fail, but requiring Into would be wrong.
+              for<'a> MulAssign<&'a B>   //used instead of DivAssign after one iteration. it's faster to mul the dividend than div the divisor.
 {
     type Item = B;
 
@@ -73,7 +73,9 @@ impl<V,B> std::iter::Iterator for IterativeBaseConversion<V,B>
             let result = self.current_value.rem_assign_with_quotient(&self.current_base_power);
             
             if self.switch_to_multiplication {
-                self.current_value = (&self.current_value * &self.base).expect("Cannot overflow.");
+                //mul_assign is in principle dangerous.
+                //Since we do two rem_assign_with_quotient calls first, we can be sure that the result is always smaller than base^max_power though.
+                self.current_value *= &self.base
             } else {
                 self.current_base_power /=  &self.base;
                 self.switch_to_multiplication = true;
@@ -128,7 +130,13 @@ mod iterative_conversion_tests{
         type Output = Option<MyU128>;
         fn mul(self, rhs: &MyU128) -> Self::Output {
             self.0.checked_mul(rhs.0).map(|s| MyU128(s))
-     }
+        }
+    }
+
+    impl MulAssign<&u64> for MyU128 {
+        fn mul_assign(&mut self, rhs: &u64) {
+            self.0.mul_assign(*rhs as u128);
+        }
     }
 
     impl RemAssignWithQuotient for MyU128{
diff --git a/src/passwordmaker/base_conversion/iterative_conversion_impl/mod.rs b/src/passwordmaker/base_conversion/iterative_conversion_impl/mod.rs
index 96ca497..d3e0045 100644
--- a/src/passwordmaker/base_conversion/iterative_conversion_impl/mod.rs
+++ b/src/passwordmaker/base_conversion/iterative_conversion_impl/mod.rs
@@ -9,7 +9,7 @@ mod precomputed_constants;
 #[cfg(all(not(feature="precomputed_max_powers"),feature="precomputed_common_max_powers"))]
 mod precomputed_common_constants;
 
-use std::ops::{DivAssign, Mul};
+use std::ops::{DivAssign, Mul, MulAssign};
 use std::convert::{TryFrom, TryInto};
 use std::fmt::Display;
 use std::error::Error;
@@ -70,6 +70,12 @@ impl Mul<&SixteenBytes> for &SixteenBytes{
     }
 }
 
+impl MulAssign<&usize> for SixteenBytes {
+    fn mul_assign(&mut self, rhs: &usize) {
+        self.0 *= *rhs as u128;
+    }
+}
+
 impl PrecomputedMaxPowers<usize> for SixteenBytes{}
 
 //--------------------------------------------------------------------------------------------------------------------------------------
@@ -253,6 +259,23 @@ impl<const N : usize> Mul<&usize> for &ArbitraryBytes<N>{
     }
 }
 
+//Done separately, because "mut references not allowed in const contexts"
+impl<const N : usize> MulAssign<&usize> for ArbitraryBytes<N>{
+    fn mul_assign(&mut self, rhs: &usize) {
+        #[cfg(target_pointer_width = "64")]
+        type Long = u128;
+        #[cfg(not(target_pointer_width = "64"))]
+        type Long = u64;
+        let rhs = *rhs as Long;
+        let carry = self.0.iter_mut().rev().fold(0 as Long, |carry, current| {
+            let res = (*current as Long) * rhs + carry;
+            *current = res as u32;
+            res >> 32
+        });
+        debug_assert_eq!(carry, 0);
+    }
+}
+
 impl<const N : usize> Mul<&ArbitraryBytes<N>> for &ArbitraryBytes<N> where ArbitraryBytes<N> : for<'a> From<&'a usize> {
     type Output = Option<ArbitraryBytes<N>>;
     ///School method. I haven't tried Karatsuba, but rule of thumb is that it only gets faster at about 32 digits. We have 8 digits max.
@@ -300,7 +323,7 @@ macro_rules! make_div_assign_with_remainder {
             debug_assert!((<$t_long>::MAX >> 32) as u128 >= <$t_divisor>::MAX as u128);
 
             let divisor = *rhs as $t_long;
-            let remainder = self.0.iter_mut().skip_while(|x| **x == 0).fold(0 as $t_long,|carry, current| {
+            let remainder = self.0.iter_mut().fold(0 as $t_long,|carry, current| {
                 debug_assert_eq!(carry, carry & (<$t_divisor>::MAX as $t_long)); //carry has to be lower than divisor, and divisor is $t_divisor.
                 let carry_shifted = carry << 32;
                 let dividend = (carry_shifted) | (*current as $t_long);
@@ -800,6 +823,13 @@ mod arbitrary_bytes_tests{
         assert!(product.is_none())
     }
     #[test]
+    fn mul_assign_usize(){
+        let mut a = ArbitraryBytes::new([0x42a7bf02,0xffffffff,0xc7138bd5,0x12345678,0xabcde012]);
+        let b = 3usize;
+        a *= &b;
+        assert_eq!(a.0, [0xC7F73D08,0xFFFFFFFF,0x553AA37F,0x369D036A,0x0369A036])
+    }
+    #[test]
     fn try_into_u32_test(){
         let a = ArbitraryBytes::new([0,0,0,0,0xabcde012]);
         let b : u32 = (&a).try_into().unwrap();
-- 
cgit v1.2.3