Browse Source

Also change MUL24x24R24 to the new format and fix rounding

Alex Voinea 2 years ago
parent
commit
70cb30208c
2 changed files with 59 additions and 51 deletions
  1. 57 48
      Firmware/speed_lookuptable.h
  2. 2 3
      Firmware/stepper.cpp

+ 57 - 48
Firmware/speed_lookuptable.h

@@ -39,61 +39,70 @@ FORCE_INLINE uint16_t MUL8x16R8(uint8_t x, uint16_t y) {
     return out;
 }
 
-// intRes = longIn1 * longIn2 >> 24
-// uses:
-// r26 to store 0
-// r27 to store the byte 1 of the 48bit result
-#define MultiU24X24toH16(intRes, longIn1, longIn2) \
-asm volatile ( \
-"clr r26 \n\t" \
-"mul %A1, %B2 \n\t" \
-"mov r27, r1 \n\t" \
-"mul %B1, %C2 \n\t" \
-"movw %A0, r0 \n\t" \
-"mul %C1, %C2 \n\t" \
-"add %B0, r0 \n\t" \
-"mul %C1, %B2 \n\t" \
-"add %A0, r0 \n\t" \
-"adc %B0, r1 \n\t" \
-"mul %A1, %C2 \n\t" \
-"add r27, r0 \n\t" \
-"adc %A0, r1 \n\t" \
-"adc %B0, r26 \n\t" \
-"mul %B1, %B2 \n\t" \
-"add r27, r0 \n\t" \
-"adc %A0, r1 \n\t" \
-"adc %B0, r26 \n\t" \
-"mul %C1, %A2 \n\t" \
-"add r27, r0 \n\t" \
-"adc %A0, r1 \n\t" \
-"adc %B0, r26 \n\t" \
-"mul %B1, %A2 \n\t" \
-"add r27, r1 \n\t" \
-"adc %A0, r26 \n\t" \
-"adc %B0, r26 \n\t" \
-"lsr r27 \n\t" \
-"adc %A0, r26 \n\t" \
-"adc %B0, r26 \n\t" \
-"clr r1 \n\t" \
-: \
-"=&r" (intRes) \
-: \
-"d" (longIn1), \
-"d" (longIn2) \
-: \
-"r26" , "r27" \
-)
+// return ((x * y) >> 24) with rounding when shifting right
+FORCE_INLINE uint16_t MUL24x24R24(__uint24 x, __uint24 y) {
+    uint16_t out;
+    __asm__ (
+    // %0 out
+    // %1 x
+    // %2 y
+    // uint8_t: %An or %n
+    // uint16_t: %Bn %An
+    // __uint24: %Cn %Bn %An
+    // uint32_t: %Dn %Cn %Bn %An
+    //
+    //
+    //    B2 A2 *
+    //       A1
+    //---------
+    // B0 A0 RR
+    "clr r26 \n\t"
+    "mul %A1, %B2 \n\t"
+    "mov r27, r1 \n\t"
+    "mul %B1, %C2 \n\t"
+    "movw %A0, r0 \n\t"
+    "mul %C1, %C2 \n\t"
+    "add %B0, r0 \n\t"
+    "mul %C1, %B2 \n\t"
+    "add %A0, r0 \n\t"
+    "adc %B0, r1 \n\t"
+    "mul %A1, %C2 \n\t"
+    "add r27, r0 \n\t"
+    "adc %A0, r1 \n\t"
+    "adc %B0, r26 \n\t"
+    "mul %B1, %B2 \n\t"
+    "add r27, r0 \n\t"
+    "adc %A0, r1 \n\t"
+    "adc %B0, r26 \n\t"
+    "mul %C1, %A2 \n\t"
+    "add r27, r0 \n\t"
+    "adc %A0, r1 \n\t"
+    "adc %B0, r26 \n\t"
+    "mul %B1, %A2 \n\t"
+    "add r27, r1 \n\t"
+    "adc %A0, r26 \n\t"
+    "adc %B0, r26 \n\t"
+    "lsl r27 \n\t"
+    "adc %A0, r26 \n\t"
+    "adc %B0, r26 \n\t"
+    "clr r1 \n\t"
+    : "=&r" (out)
+    : "r" (x), "r" (y)
+    : "r0", "r1", "r26" , "r27" //clobbers: Technically these are either scratch registers or always 0 registers, but I'm making sure the compiler knows just in case. R26 is __zero_reg__, R27 is a temporary register.
+    );
+    return out;
+}
 
 #else //_NO_ASM
 
-static inline void MultiU16X8toH16(uint16_t& intRes, uint8_t& charIn1, uint16_t& intIn2)
+FORCE_INLINE uint16_t MUL8x16R8(uint8_t charIn1, uint16_t intIn2)
 {
-    intRes = ((uint32_t)charIn1 * (uint32_t)intIn2) >> 8;
+    return ((uint32_t)charIn1 * (uint32_t)intIn2) >> 8;
 }
 
-static inline void MultiU24X24toH16(uint16_t& intRes, uint32_t& longIn1, uint32_t& longIn2)
+FORCE_INLINE uint16_t MUL24x24R24(uint32_t longIn1, uint32_t longIn2)
 {
-    intRes = ((uint64_t)longIn1 * (uint64_t)longIn2) >> 24;
+    return ((uint64_t)longIn1 * (uint64_t)longIn2) >> 24;
 }
 
 #endif //_NO_ASM

+ 2 - 3
Firmware/stepper.cpp

@@ -818,7 +818,7 @@ FORCE_INLINE void isr() {
       //WRITE_NC(LOGIC_ANALYZER_CH1, true);
       if (step_events_completed.wide <= current_block->accelerate_until) {
         // v = t * a   ->   acc_step_rate = acceleration_time * current_block->acceleration_rate
-        MultiU24X24toH16(acc_step_rate, acceleration_time, current_block->acceleration_rate);
+        acc_step_rate = MUL24x24R24(acceleration_time, current_block->acceleration_rate);
         acc_step_rate += uint16_t(current_block->initial_rate);
         // upper limit
         if(acc_step_rate > uint16_t(current_block->nominal_rate))
@@ -838,8 +838,7 @@ FORCE_INLINE void isr() {
 #endif
       }
       else if (step_events_completed.wide > current_block->decelerate_after) {
-        uint16_t step_rate;
-        MultiU24X24toH16(step_rate, deceleration_time, current_block->acceleration_rate);
+        uint16_t step_rate = MUL24x24R24(deceleration_time, current_block->acceleration_rate);
 
         if (step_rate > acc_step_rate) { // Check step_rate stays positive
             step_rate = uint16_t(current_block->final_rate);