Hi,
Writing an MP3 decoder on the Uno M0 is JUST possible but you need to get just a few bits of code optimized to prove that you have the cycles to get the job done. For that reason I'm sharing these:
/***************************************
- Unsigned 32 x 32 -> 64 bit multiply. *
***************************************/
// Factor0 = r0
// Factor1 = r1
// Result = r0:r1
UMUL:
uxth r2,r0 //Factor0 lo [0:15]
lsrs r0,r0,#16 //Factor0 hi [16:31]
lsrs r3,r1,#16 //Factor1 hi [16:31]
uxth r1,r1 //Factor1 lo [0:15]
mov r4,r1 //Copy Factor1 lo [0:15]
muls r1,r2 //Factor1 lo * Factor0 lo
muls r4,r0 //Factor0 lo * Factor0 hi
muls r0,r3 //Factor0 hi * Factor1 hi
muls r3,r2 //Factor1 hi * Factor0 lo
lsls r2,r4,#16 //(Factor1 lo * Factor0 hi)<<16
lsrs r4,r4,#16 //(Factor1 lo * Factor0 hi)>>16
adds r1,r2 //(Factor1 lo * Factor0 lo) + (Factor1 lo * Factor0 hi)<<16
adcs r0,r4 //(Factor0 hi * Factor1 hi) + (Factor0 lo * Factor0 hi)>>16
lsls r2,r3,#16 //(Factor1 hi * Factor0 lo)<<16
lsrs r3,r3,#16 //(Factor1 hi * Factor0 lo)>>16
adds r1,r2 //(Factor1 lo * Factor0 lo) + (Factor0 lo * Factor0 hi)<<16 + (Factor1 hi * Factor0 lo)<<16
adcs r0,r3 //(Factor0 hi * Factor1 hi) + (Factor0 lo * Factor0 hi)>>16 + (Factor1 hi * Factor0 lo)>>16
bx lr
/************************************
*Signed 32 x 32 -> 64 bit multiply. *
************************************/
// Factor0 = r0
// Factor1 = r1
// Result = r0:r1
SMUL:
uxth r2,r0 //Factor0 lo [0:15]
asrs r0,r0,#16 //Factor0 hi [16:31]
asrs r3,r1,#16 //Factor1 hi [16:31]
uxth r1,r1 //Factor1 lo [0:15]
mov r4,r1 //Copy Favtor1 lo [0:15]
muls r1,r2 //Factor1 lo * Factor0 lo
muls r4,r0 //Factor0 lo * Factor0 hi
muls r0,r3 //Factor0 hi * Factor1 hi
muls r3,r2 //Factor1 hi * Factor0 lo
lsls r2,r4,#16 //(Factor1 lo * Factor0 hi)<<16
asrs r4,r4,#16 //(Factor1 lo * Factor0 hi)>>16
adds r1,r2 //(Factor1 lo * Factor0 lo) + (Factor1 lo * Factor0 hi)<<16
adcs r0,r4 //(Factor0 hi * Factor1 hi) + (Factor0 lo * Factor0 hi)>>16
lsls r2,r3,#16 //(Factor1 hi * Factor0 lo)<<16
asrs r3,r3,#16 //(Factor1 hi * Factor0 lo)>>16
adds r1,r2 //(Factor1 lo * Factor0 lo) + (Factor0 lo * Factor0 hi)<<16 + (Factor1 hi * Factor0 lo)<<16
adcs r0,r3 //(Factor0 hi * Factor1 hi) + (Factor0 lo * Factor0 hi)>>16 + (Factor1 hi * Factor0 lo)>>16
bx lr
/********************************
- Unsigned square 32² -> 64 bit *
********************************/
//Input = r0
//Result = r0:r1
USQR:
lsrs r1,r0,#16 //input [16:31]>>16
uxth r0,r0 //input [0:15]
mov r2,r1 //copy [16:31]>>16
muls r2,r0 //input [0:15] * input[16:31]
muls r1,r1 //input [16:31]>>16²
muls r0,r0 //input [0:15]²
lsrs r3,r2,#15 //(input [0:15] * input[16:31])>>15
lsls r2,r2,#17 //(input [0:15] * input[16:31])<<17
adds r0,r2 //input [0:15]² + (input [0:15] * input[16:31])<<17
adcs r1,r3 //(input [0:15] * input[16:31])<<17)
bx lr
/******************************
- Signed square 32² -> 64 bit *
******************************/
SSQR
asrs r1,r0,#16 //input [16:31]>>16
uxth r0,r0 //input [0:15]
mov r2,r1 //copy [16:31]>>16
muls r2,r0 //input [0:15] * input[16:31]
muls r1,r1 //input [16:31]>>16²
muls r0,r0 //input [0:15]²
lsrs r3,r2,#15 //(input [0:15] * input[16:31])>>15
lsls r2,r2,#17 //(input [0:15] * input[16:31])<<17
adds r0,r2 //input [0:15]² + (input [0:15] * input[16:31])<<17
adcs r1,r3 //(input [0:15] * input[16:31])<<17)
bx lr
/*******************************
- Unsigned 32-bit square-root. *
*******************************/
USQRT:
movs r1,#1 //set initial adder to $80000000
lsls r1,#30 //
movs r2,#0 //Set initial result to 0.
.2b:
adds r3,r2,r1
lsrs r2,#1
cmp r0,r3
bcc .1f
subs r0,r3
adds r2,r1
.1f:
lsrs r1,#2
bne .2b
movs r0,r2
bx lr
/*************************************
- Absolute value of a 32-bit number. *
*************************************/
FASTABS:
asrs r1, r0, #31 //[1]extend sign-bit down to bits 0-30
eors r0, r1 //[1]negate register if negative
subs r0, r1 //[1]value += 1 if negated to return absolute
bx lr
/**********************
- Count Leading zeros *
**********************/
//r0 = register to count lead zeros
CLZ:
mov r1,#32 //[1]set all bits full (32 leading zeros)
lsrs r2,r0,#16 //[1] test high 16 bits
beq .f1 //[1/2] if zero, jump forward
subs r1,r1,#16 //[1] otherwise decrement our counter (result)
mov r0,r2 //[1] and keep new value */
.1f
lsrs r2,r0,#8 //[1] test bits 31..23 or 15..8
beq .2f //[1/2] if zero, jump forward
subs r1,r1,#8 //[1] otherwise decrement our counter (result)
mov r0,r2 //[1] and keep new value
.2f
ld r2,=.table //[2/0] point to our look-up table (if table is at $00000004)
ldrb r0,[r2,r0] //[2] convert byte to a count between 0 and 8 (ldrb R0[PC,#4] means base inclusinve)
adds r0,r0,r1 //[1] add the number of bits we've already counted (8, 16 or 24)
bx .lr
//** Macro to define the multiplies
.macro rep_byte byte,count
.rept \count
.byte \byte
.endr
.endm
.macro .1f
//Natural ->LogP table generation
.1:
rep_byte 8,1 /* table entry 0 /
rep_byte 7,1 / table entry 1 /
rep_byte 6,2 / table entry 2 /
rep_byte 5,4 / table entry 3 /
rep_byte 4,8 / table entry 4 /
rep_byte 3,16 / table entry 5 /
rep_byte 2,32 / table entry 6 /
rep_byte 1,64 / table entry 7 /
rep_byte 0,128 / table entry 8 */
.endm