Problem with.... something? Working with assembly...

Hello!

I'm stuck at this problem. I have .ino and .S files, whole point is in working with TM1637 7-seg display without using library, to save space which is scarce in ATtiny13, which is my platform in this sketch.

You can ignore SETUP part, and LOOP is writting "1111" and "2222" to display alternately. Now, function which writes "2222" is identical to the routine which writes "1111", but 1111 is written well and 2222 is not. Difference is in that in "2222" registers r16, r17, r18 and r19 - one register for each digit - are used to pass value to r24 (r24 holds argument for function "TM1637_write_byte_parameter_r24_uses_r25_returns_r24").

Code is organised in the way that INO file mostly just calls functions defined in S file.

INO file:

// C:\Users\ADAS\AppData\Local\Temp\arduino_build_112814>objdump -S tm1637cpp.ino.elf > tm1637cpp.txt

// TM 1637 7-seg display test code, no library

#define TM1637_DIO_PIN PB0
#define TM1637_CLK_PIN PB1
 
extern "C" {
 // function prototypes
 void TM1637_DELAY_US();
 void TM1637_start();
 void TM1637_stop();
 void TM1637_write_segments_arguments_r16_r17_r18_r19_uses_r24();
 void TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
}

void setup()
{
	asm("in	r24, 0x17");
    asm("ori r24, 0x03");
	asm("out 0x17, r24");
	
	// initialise display and set brightness
	// 0x88 is dim and increasing value to 0x8C increases brightness
	TM1637_start();
	asm("ldi r24, 0x8c");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	TM1637_stop();

	// clear display
 	TM1637_start();
	asm("ldi r24, 0x40");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	TM1637_stop();
	TM1637_start();
	asm("ldi r24, 0xc0");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	asm("ldi r24, 0xff");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	asm("ldi r24, 0xff");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	asm("ldi r24, 0xff");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	asm("ldi r24, 0xff");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	TM1637_stop();
}

void loop()
{
	// this routine is working good, it writes "1111" on display:
	
	TM1637_start();
	asm("ldi r24, 0x40");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	TM1637_stop();
	TM1637_start();
	asm("ldi r24, 0xc0");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	asm("ldi r24, 0x06");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	asm("ldi r24, 0x06");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	asm("ldi r24, 0x06");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	asm("ldi r24, 0x06");
	TM1637_write_byte_parameter_r24_uses_r25_returns_r24();
	TM1637_stop();
 
	delay(800);

	// this routine is identic to previous, but does not work well
	// (something about using those registers?), it tries to write "2222":
	
	asm("ldi r16, 0x5b");
	asm("ldi r17, 0x5b");
	asm("ldi r18, 0x5b");
	asm("ldi r19, 0x5b");
	TM1637_write_segments_arguments_r16_r17_r18_r19_uses_r24();

	delay(800);

}

S file:

#define DIO_PIN 0 ; PB0
#define CLK_PIN 1 ; PB1
#define DDRB 0x17
#define PORTB 0x18

.global TM1637_DELAY_US
.global TM1637_start
.global TM1637_stop
.global TM1637_write_segments_arguments_r16_r17_r18_r19_uses_r24
.global TM1637_write_byte_parameter_r24_uses_r25_returns_r24

TM1637_DELAY_US: ; 50 us at 1.2 mhz
 ; rcall takes 3 cycles
 ; ret takes 4 cycles
 ; we need another 53 cycles
    ldi  r18, 25 ; 1 cycle
L1: dec  r18
    breq L1 ; 1 cycle for true and 2 for false
 ret
 
TM1637_start:
	sbi PORTB, DIO_PIN
	sbi PORTB, CLK_PIN
	rcall TM1637_DELAY_US
	cbi PORTB, DIO_PIN
	ret

TM1637_stop:
	cbi PORTB, CLK_PIN
 	rcall TM1637_DELAY_US
	cbi PORTB, DIO_PIN
	rcall TM1637_DELAY_US
	sbi PORTB, CLK_PIN
 	rcall TM1637_DELAY_US
	sbi PORTB, DIO_PIN
	ret

; This function does the same thing as the routine in THE LOOP
; but this one doesn't work well:
TM1637_write_segments_arguments_r16_r17_r18_r19_uses_r24:
	rcall	TM1637_start
	ldi		r24, 0x40
	rcall	TM1637_write_byte_parameter_r24_uses_r25_returns_r24
	rcall	TM1637_stop
	rcall	TM1637_start
	ldi		r24, 0xc0
	rcall	TM1637_write_byte_parameter_r24_uses_r25_returns_r24
	mov		r24, r16
	rcall	TM1637_write_byte_parameter_r24_uses_r25_returns_r24
	mov		r24, r17
	rcall	TM1637_write_byte_parameter_r24_uses_r25_returns_r24
	mov		r24, r18
	rcall	TM1637_write_byte_parameter_r24_uses_r25_returns_r24
	mov		r24, r19
	rcall	TM1637_write_byte_parameter_r24_uses_r25_returns_r24
	rcall	TM1637_stop
	ret
	
TM1637_write_byte_parameter_r24_uses_r25_returns_r24:
	ldi    r25, 0x08			; r25 will serve as counter in for loop (i)
forloop:
	cbi    0x18, 1				; CLK_PIN - LOW
	rcall  TM1637_DELAY_US
	sbrs   r24, 0				; skip if first bit is 1
	rjmp   r24_first_bit_aint_1
	sbi    0x18, 0				; DIO - HIGH
	rjmp   r24_first_bit_is_1
r24_first_bit_aint_1:
	cbi    0x18, 0				; DIO_PIN - LOW
r24_first_bit_is_1:
	sbi    0x18, 1				; CLK_PIN - HIGH
	rcall  TM1637_DELAY_US
	lsr    r24					; r24 shift right by one place
	subi   r25, 0x01			; i--
	brne   forloop				; jump if i isn't null
	
	cbi    0x18, 1				; CLK - LOW
	cbi    0x17, 0				; DIO - INPUT
	sbi    0x18, 0				; DIO – ENABLE PULL-UP
	rcall  TM1637_DELAY_US
	sbic   0x16, 0				; skip if DIO_PIN (set as input) reads false
	rjmp   ack_is_true			; if (ack), jump downthere
	ldi    r24, 0x00			; return ACK (not used, should remove this line)
continue_execution:
	rcall  TM1637_DELAY_US
	sbi    0x18, 1				; CLK_PIN - HIGH
	rcall  TM1637_DELAY_US
	cbi    0x18, 1				; CLK_PIN - LOW
	rcall  TM1637_DELAY_US
	sbi    0x17, 0				; DIO_PIN - OUTPUT
	ret
ack_is_true:
	sbi    0x17, 0				; set DIO_PIN as output
	cbi    0x18, 0				; DIO_PIN - LOW
	ldi    r24, 0x01			; return ACK (not used, should remove this line)
	rjmp   continue_execution

Here is compiled code, the problematic part:

IMO you scratch r18 in TM1637_DELAY_US whereas you are using this register in TM1637_write_segments_arguments_r16_r17_r18_r19_uses_r24. Use r0 instead in TM1637_DELAY_US should fix the issue.

ard_newbie:
IMO you scratch r18 in TM1637_DELAY_US whereas you are using this register in TM1637_write_segments_arguments_r16_r17_r18_r19_uses_r24. Use r0 instead in TM1637_DELAY_US should fix the issue.

Dude you're my saviour!!! I usually code after my family goes to sleep so I'm not very concentrated and these stupid details slip through... Anyways, I'm putting registers used in names of functions, and now my DELAY function uses r20 instead of 18 and is now called "TM1637_DELAY_US_uses_r20"

THANX MAN!!!

A better practice would be to pass arguments inside your functions:
The first one will be received in 24/r25, second in r22/r23, third in r20/r21, and fourth in r18/r19.

ard_newbie:
A better practice would be to pass arguments inside your functions:
The first one will be received in 24/r25, second in r22/r23, third in r20/r21, and fourth in r18/r19.

Sorry but I don't get it?

Here is a very basic example sketch to blink an LED by calling a function with two arguments:

The .ino sketch:

extern "C" {
  void start(uint8_t arg1, uint16_t arg2); // First argument is passed thru r24/r25, second thru r22/r23,...
}

int main(void) {
  start(250, 3000);

while(true);
}

The .S part of the sketch:

; Blink LED on PB5(Arduino Uno pin 13)

#define __SFR_OFFSET 0
#include   "avr/io.h"

.global start

start:
SBI      DDRB, 5
blink:
mov r20, r24 ;***
CALL       delay_n_ms
SBI      PORTB, 5
mov r20, r24 ;***
CALL       delay_n_ms
CBI      PORTB, 5
JMP       blink

delay_n_ms:
; Delay about r20 * 1ms.  Destroys r20, r30, and r31.
; One millisecond is about 16000 cycles at 16MHz.
; The basic loop takes about 5 cycles, so we need about 3000 loops.
mov r31, r23 ;***
mov r30, r22 ;***
delaylp:
sbiw r30, 1
brne delaylp
subi r20, 1
brne delay_n_ms
ret

Okay I get it - to let compiler worry about which register is used, but then I don't save space because compiler inserts bunch of PUSH-es in the beginning of function and POP's at the end of functions. That eats space, and I don't need 2 registers for one argument because I hope to chop down all of my variables to 8 bits, kill all 16 bit integers.
Thank you again for your help!

A better practice would be to pass arguments inside your functions:
The first one will be received in 24/r25, second in r22/r23, third in r20/r21, and fourth in r18/r19.

he's advocating the opposite of what I recommended a while ago. Using a standard argument calling convention is definitely "better practice." Using customer argument calling conventions can save you space. As you're finding out, it also can cause difficult-to-find bugs.

westfw:
he's advocating the opposite of what I recommended a while ago. Using a standard argument calling convention is definitely "better practice." Using customer argument calling conventions can save you space. As you're finding out, it also can cause difficult-to-find bugs.

Yes, I'm a victim of my optimisation for size, I hope in my next project I won't have to do any considerable optimizations :slight_smile: