PSTR() flash string de-duplication -- a problem, a solution, and a question

Here's a possible workaround: Replace the "0:" label and the "0b" backreferences with "999:" and "999b", respectively. (Any number greater than 9 should work just as well.)

When I did that, the disassembly suddenly started showing two distinct pairs of ldi statements (with different offsets) where the original (using the "0:" label) hadn't. The idea of giving this change a try was inspired by the following statement from the relevant GNU Assembler docs:

It is also worth noting that the first 10 local labels (0:...9:) are implemented in a slightly more efficient manner than the others.

It seems that "slightly more efficient" also implies "slightly buggy" in our particular case here.

Here's the full repro case:

void setup() {}

bool flag = false;
volatile PGM_P result;

void loop()
{
  flag = !flag;

  result =
    flag
      ? (__extension__({
          PGM_P ptr;
          asm volatile
          (
            ".pushsection .progmem.data, \"SM\", @progbits, 1" "\n\t"
            "999: .string " "\"false\""                        "\n\t"
            ".popsection"                                      "\n\t"
          );
          asm volatile
          (
            "ldi %A0, lo8(999b)"                               "\n\t"
            "ldi %B0, hi8(999b)"                               "\n\t"
            : "=d" (ptr)
          );
          ptr;
        }))
      : (__extension__({
          PGM_P ptr;
          asm volatile
          (
            ".pushsection .progmem.data, \"SM\", @progbits, 1" "\n\t"
            "999: .string " "\"true\""                         "\n\t"
            ".popsection"                                      "\n\t"
          );
          asm volatile
          (
            "ldi %A0, lo8(999b)"                               "\n\t"
            "ldi %B0, hi8(999b)"                               "\n\t"
            : "=d" (ptr)
          );
          ptr;
        }));
}

Here's the corresponding disassembly of the loop() method -- note the two distinct ldi pairs at offsets ae and b4 that refer to string offsets 104 and 110, respectively:

0000009e <loop>:
  9e:	80 91 02 01 	lds	r24, 0x0102
  a2:	91 e0       	ldi	r25, 0x01	; 1
  a4:	89 27       	eor	r24, r25
  a6:	80 93 02 01 	sts	0x0102, r24
  aa:	88 23       	and	r24, r24
  ac:	19 f0       	breq	.+6      	; 0xb4 <loop+0x16>
  ae:	88 e6       	ldi	r24, 0x68	; 104
  b0:	90 e0       	ldi	r25, 0x00	; 0
  b2:	02 c0       	rjmp	.+4      	; 0xb8 <loop+0x1a>
  b4:	8e e6       	ldi	r24, 0x6E	; 110
  b6:	90 e0       	ldi	r25, 0x00	; 0
  b8:	90 93 01 01 	sts	0x0101, r25
  bc:	80 93 00 01 	sts	0x0100, r24
  c0:	08 95       	ret

For comparison, the disassembly when "0:" is used as the local label instead of "999:" -- note that there's only a single ldi pair at offset b0, as you had shown already:

0000009e <loop>:
  9e:	80 91 02 01 	lds	r24, 0x0102
  a2:	91 e0       	ldi	r25, 0x01	; 1
  a4:	89 27       	eor	r24, r25
  a6:	80 93 02 01 	sts	0x0102, r24
  aa:	88 23       	and	r24, r24
  ac:	09 f0       	breq	.+2      	; 0xb0 <loop+0x12>
  ae:	00 c0       	rjmp	.+0      	; 0xb0 <loop+0x12>
  b0:	8e e6       	ldi	r24, 0x6E	; 110
  b2:	90 e0       	ldi	r25, 0x00	; 0
  b4:	90 93 01 01 	sts	0x0101, r25
  b8:	80 93 00 01 	sts	0x0100, r24
  bc:	08 95       	ret