MADNESS: AtTiny13a VGA driver - 40*40 pixels in 8 colours

Hi All

I have been experimenting with driving VGA using smaller and smaller chips

I started with atmega328, then dropped to ATTiny85, but this week wanted to see how hard i could squeeze an ATTiny13a

For those who dont know, the ATTiny13a has only 1k of flash ROM and 64 bytes of RAM, so its tricky to get anything complex in there

Anyway, I managed to squeeze a 40*40 pixel array in 8 colours out of only 30 bytes of RAM, huge cheat obviously, and I was running it at 32mhz so that helped !!

I created a video here...

ATTiny13a chips are £0.26p (in qty of 10)
32mhz oscillators are £0.18p (in qty of 10)

You really go "One Step Beyond" Madness indeed :wink:

Well done!

robtillaart:
You really go "One Step Beyond" Madness indeed :wink:

Well done!

Thanks Rob :slight_smile:

A bit more animated now, with a projector and some fog !!

And here is the code to run it....

/*
ATTINY13A pinout :

              RESET 1      8 VCC
        32mhz CLOCK 2      7 GREEN
               BLUE 3      6 RED
                GND 4      5 VHSYNC 

MICRO   PORT  RES  SIGNAL  VGA CONN
Pin 5 - PB0   1k   VHsync  (pin13)
Pin 6 - PB1   47r  RED     (pin1)
Pin 7 - PB2   47r  GREEN   (pin2)
Pin 3 - PB4   47r  BLUE    (pin3)
*/

// Video Status
#define VS_VSYNC_LINE 0
#define VS_BLANK_LINE 1
#define VS_ACTIVE_LINE 2
// scanLine state triggers
#define END_OF_VSYNC 7 
#define START_RENDER 42 
#define END_RENDER 282 
#define END_OF_FRAME 312 
// compA/B interrupt trigger points
#define CYCLES_HORZ_SYNC 16 // end point of HSYNC
#define CYCLES_VERT_SYNC 255 // maxed out comp reg for flat line during VSync...
#define CYCLES_RENDER_START 40 // horizontal start point of render 
// hardware defines
#define PORT_VID PORTB
#define VSCALE_CONST 5

// RAM - 64 bytes
volatile uint8_t H_RED_data[5], H_GREEN_data[5], H_BLUE_data[5]; 
volatile uint8_t V_RED_data[5], V_GREEN_data[5], V_BLUE_data[5]; 
uint8_t vscale = VSCALE_CONST; 
int16_t scanLine = -1; 
uint8_t V_data_bits; 
int8_t renderLine;
volatile uint8_t timer; 

int main(void) {

  DDRB = 0b00010111;                      // set up output ports
  TCCR0A = _BV(WGM00) | _BV(WGM01) | _BV (COM0A1) | _BV(COM0A0);   // fast PWM H&VSync pulses on OC0A (pin5)
  TCCR0B = _BV(CS01) ;                    // fast PWM, top at 0xFF, div8 clock
  OCR0A = CYCLES_HORZ_SYNC;               // 4us(16 clicks) required for Hsync
  OCR0B = CYCLES_RENDER_START;            // ~12.5us(40 clicks) from line start to render start
  TIMSK0 |= _BV(TOIE0);                   // timer 0 compare B interrupt enable
  sei(); 

// set up colour arrays
  H_GREEN_data[0] = 0b00000011;         
  H_BLUE_data[0] = 0b00000011;         
  for (uint8_t x=0; x<5; x++) {           
    V_BLUE_data[x] =   0b11111111;         
    V_GREEN_data[x] = 0b11111111;
    H_RED_data[x] =  0b00011000;
    V_RED_data[x] =  0b00011000;
  }

  while(1) { // this is the main loop
    if (timer == 2) {
      // timed code here....
      shiftFwd(&H_GREEN_data[0]);
      shiftFwd(&H_BLUE_data[0]);
      timer=0;
    }
  } // while
}

void shiftFwd(volatile uint8_t *COL) { 
  asm volatile(
    "ld r16,x+\n\t"
    "ld r17,x+\n\t"
    "ld r18,x+\n\t"
    "ld r19,x+\n\t"
    "ld r20,x\n\t"
    "bst r20,0\n\t"
    "lsr r16\n\t"
    "ror r17\n\t"
    "ror r18\n\t"
    "ror r19\n\t"
    "ror r20\n\t"
    "bld r16,7\n\t"
    "clc\n\t"
    "st x, r20\n\t"
    "st -x, r19\n\t"
    "st -x, r18\n\t"
    "st -x, r17\n\t"
    "st -x, r16\n\t"
    ::
     "x" (COL)               // x (r27:r26) 
    : 
    "r16", "r17", "r18", "r19", "r20", "r21" // clobbers 
  ); // end of asm
}

void shiftRev(volatile uint8_t *COL) { 
  asm volatile(
    "ld r16,x+\n\t"
    "ld r17,x+\n\t"
    "ld r18,x+\n\t"
    "ld r19,x+\n\t"
    "ld r20,x\n\t"
    "bst r16,7\n\t"
    "lsl r20\n\t"
    "rol r19\n\t"
    "rol r18\n\t"
    "rol r17\n\t"
    "rol r16\n\t"
    "bld r20,0\n\t"
    "clc\n\t"
    "st x, r20\n\t"
    "st -x, r19\n\t"
    "st -x, r18\n\t"
    "st -x, r17\n\t"
    "st -x, r16\n\t"
    ::
     "x" (COL)               // x (r27:r26) 
    : 
    "r16", "r17", "r18", "r19", "r20", "r21" // clobbers 
  ); // end of asm
}

void setVbits(void) {
      uint8_t yByte = renderLine>>3; 
      uint8_t yBit = 7-(renderLine&7);
      V_data_bits =   (V_RED_data[yByte] >> yBit) & 1;
      V_data_bits |= ((V_GREEN_data[yByte] >> yBit) & 1) << 1;
      V_data_bits |= ((V_BLUE_data[yByte] >> yBit) & 1) << 2; 
}

ISR(TIM0_OVF_vect){
  if (scanLine == START_RENDER-1) { vscale = VSCALE_CONST; renderLine = 0; setVbits(); TIMSK0 |= _BV(OCIE0B); }
  else if (scanLine > START_RENDER-1 && scanLine < END_RENDER-1) {
    if (!vscale) { 
      vscale = VSCALE_CONST; 
      renderLine++; 
      setVbits();
      }
    else { vscale--; }
    }
  else if (scanLine == END_RENDER) { TIMSK0 &= ~_BV(OCIE0B); }
  else if (scanLine == END_OF_VSYNC) { OCR0A = CYCLES_HORZ_SYNC; }
  else if (scanLine == END_OF_FRAME) { OCR0A = CYCLES_VERT_SYNC; scanLine =-1; timer++; } // do audio sample here
  scanLine++;
}

ISR(TIM0_COMPB_vect) { 
  if ( scanLine > START_RENDER) { 
    render(); 
  } 
}

void render(void) { 
  asm volatile(
// line start delay
            "ldi r19,22\n\t"                   // set delay amount
      "S_delay_%=:\n\t"
            "dec r19\n\t"                      // decrement delay
            "brne S_delay_%=\n\t"
// init            
            "in	r16,%[port]\n\t"               // get contents of port and save in r16
            "ldi r17,5\n\t"                    // 5 bytes per line r17=byte count
// get bytes for RG&B
      "byte_loop_%=:\n\t"
            "ld r23,x+\n\t"                    // r18 = red byte
            "ld r24,y+\n\t"                    // rGG = green byte
            "ld r25,z+\n\t"                    // rBB = blue byte
            "ldi r18, 8\n\t "                  // r20 = bit count
// deal with bits, first RED
      "bit_loop_%=:\n\t"
            "bst r23, 7\n\t"	               // grab Tbit from red byte
            "brtc red_not_set_%=\n\t"          // if clear then clear !
            "bst %[vData], 0\n\t"              // grab red bit (0) from V_data
            "bld r16, 1\n\t"                   // store T (v_data bit) into r16 red bit
            "rjmp green_%=\n\t"
// NO RED
      "red_not_set_%=:\n\t"  
            "andi r16, 0b11111101\n\t"         // unset red
            "nop\n\t nop\n\t nop\n\t"
// deal with GREEN
      "green_%=:\n\t"
            "bst r24, 7\n\t"	               // grab Tbit from green byte
            "brtc green_not_set_%=\n\t"        // if clear then clear !
            "bst %[vData], 1\n\t"              // grab green bit (1) from V_data
            "bld r16, 2\n\t"                   // store T (v_data bit) into r16 green bit
            "rjmp blue_%=\n\t"
// NO GREEN
      "green_not_set_%=:\n\t"  
            "andi r16, 0b11111011\n\t"         // unset green
            "nop\n\t nop\n\t nop\n\t"
// deal with BLUE
      "blue_%=:\n\t"
            "bst r25, 7\n\t"	               // grab Tbit from green byte
            "brtc blue_not_set_%=\n\t"         // if clear then clear !
            "bst %[vData], 2\n\t"              // grab blue bit (2) from V_data
            "bld r16, 4\n\t"                   // store T (v_data bit) into r16 green bit
            "rjmp next_bit_%=\n\t"
// NO BLUE
      "blue_not_set_%=:\n\t"  
            "andi r16, 0b11101111\n\t"         // unset blue
            "nop\n\t nop\n\t nop\n\t"
// output RG&B to port
      "next_bit_%=:\n\t"
            "out %[port], r16\n\t"             // send result to port
// delay before next bit
            "ldi r19,2\n\t"                    // set delay amount
      "B_delay_%=:\n\t"
            "dec r19\n\t"                      // decrement delay
            "brne B_delay_%=\n\t"
// roll all to get next bits            
            "rol r23\n\t"                      // roll red byte to get next bit
            "rol r24\n\t"                      // roll green byte to get next bit
            "rol r25\n\t"                      // roll blue byte to get next bit
            "dec r18\n\t"                      // dec bit count
            "brne bit_loop_%=\n\t"             // go get next bit
// go get next byte
            "dec r17\n\t"                      // dec byte count
            "brne byte_loop_%=\n\t"            // go get next byte
// delay before clearing outputs at line end !
            "nop\n\t nop\n\t nop\n\t nop\n\t"
            "nop\n\t nop\n\t nop\n\t nop\n\t"
            "andi r16, 0b11101001\n\t"         // clear output for end of line
            "out %[port], r16\n\t"             // output clear
            ::
   	    [port] "i" (_SFR_IO_ADDR(PORT_VID)),  // output port
            [vData] "r" (V_data_bits),            // vertical data bits 0b00000BGR for vertical line enable
  	    "x" (&H_RED_data[0]),                 // x (r27:r26) 
  	    "y" (&H_GREEN_data[0]),               // y (r29:r28) 
  	    "z" (&H_BLUE_data[0])                 // z (r31:r30) 
  	    :  
  	    "r16", "r17", "r18", "r19", "r23", "r24", "r25"// clobbers 
          ); // end of asm
}

I forgot to mention this is PAL only at the moment, I cant easily test with NTSC as I am from the UK

Holy mother of overclock. It's stable at 32MHz?

I forgot to mention this is PAL only at the moment, I cant easily test with NTSC as I am from the UK

I don't understand. I thought you said it was VGA?
VGA is VGA. There is not NTSC or PAL except with analog TV signals

Yes it is perfectly stable at 32mhz, no heat, no odd occurances, it just runs and runs !!

I am also a little confused over PAL/NTSC, as far as I know my (UK) video devices run at 50hz/25hz frame rate because that matches the 240v electric AC cycle speed, NTSC devices run at 60hz(etc) for the same reason

as far as VGA is concerned, I am running my software into the 'computer in' 15 pin D-type connector on my projector, which I wrongly assumed is a VGA port... I am yet to get anything working on a standard PC monitor yet as my monitor reports back that my timing is outside the acceptable range.. even though it runs fine on the projector

If anyone can help to resolve my VGA confusion it would be great !!! thanks

Very nice!

Have you considered bitbanging the video signal without interrupts? That way you can avoid interrupt overhead and keep stuff in registers which saves a lot of cycles. You would need cycle counted assembly to generate a properly synced video signal. Low amount of progmem could be an issue on ATtiny13 though.

I used this technique in my 6502 homebrew computer project which can generate a 400x256 PAL signal (among other things) using ATmega1284P running at 16 MHz.

Not all monitors accept the same tolerances. Try another one.
NTSC and PAL only applies to TV standards. Not monitors

PetriH:
Very nice!

Have you considered bitbanging the video signal without interrupts? That way you can avoid interrupt overhead and keep stuff in registers which saves a lot of cycles. You would need cycle counted assembly to generate a properly synced video signal. Low amount of progmem could be an issue on ATtiny13 though.

I used this technique in my 6502 homebrew computer project which can generate a 400x256 PAL signal (among other things) using ATmega1284P running at 16 MHz.

Thanks Petri, I would like to keep the interrupt in there so that I can just change the contents of the bit map arrays etc outside it, I have much bigger programs running similar (ATTiny85, atmega328) that do alot of maths outside the interrupt, and even the 328 is receiving data at 250kbps

smeezekitty:
Not all monitors accept the same tolerances. Try another one.
NTSC and PAL only applies to TV standards. Not monitors

Cheers :slight_smile: I think its all starting to sink in now...

Hi Mcnobby,
your ATTINY project is great!

I've seen the video and your sourcecode.
The problem can be a little jitter in the execution of your interrupt function (1clock or more of delay that depends on the instruction that is in execution when the interrupt happens) . I have got a similar problem but in my case the signal was more disaligned (1pixel or more).
I've saw this video by Charles CNLOHR before i realized the problem.


Here my VGA library for Arduino UNO. The sketch runs at 16Mhz without an additional oscillator.
Here my post on this forum.

Yes Smaffer, this is EXACTLY what I need, I kinda understand how it all works, its just implementing into into the timer structure that I have running may/maynot be easy

I shall do more work on this when I get back to studying the code again

Incidentally, I made a post on here with 24bit colour from a AtMega328, but unfortunately only one colour on screen at once

I have got some more videos on my youtube of how I created dithered colours using a really cool way, its not brilliantly usable for text but may work well for pictures

Great work!

If you want to try some code, i've used this asm block

//interrupt jitter fix (needed to keep signal stable)
//code from https://github.com/cnlohr/avrcraft/tree/master/terminal
//modified from 4 nop align to 8 nop align
#define DEJITTER_OFFSET 1
#define DEJITTER_SYNC -3
asm volatile(
  "     lds r16, %[timer0]    \n\t" //
  //"   add r16, %[toffset]   \n\t" //
  "     subi r16, %[tsync]    \n\t" //
  "     andi r16, 7           \n\t" //
  "     call TL               \n\t" //
  "TL:                        \n\t" //
  "     pop r31               \n\t" //
  "     pop r30               \n\t" //
  "     adiw r30, (LW-TL-5)   \n\t" //
  "     add r30, r16          \n\t" //
  //"   adc r31, __zero_reg__ \n\t" //
  "     ijmp                  \n\t" //
  "LW:                        \n\t" //
  "     nop                   \n\t" //
  "     nop                   \n\t" //
  "     nop                   \n\t" //
  "     nop                   \n\t" //
  "     nop                   \n\t" //
  "     nop                   \n\t" //
  "     nop                   \n\t" //
  //"   nop                   \n\t" //
  "LBEND:                     \n\t" //
:
: [timer0] "i" (&TCNT0),
  [toffset] "i" ((uint8_t)DEJITTER_OFFSET),
  [tsync] "i" ((uint8_t)DEJITTER_SYNC)
: "r30", "r31", "r16", "r17");

Remember to setup the TIMER0 without any prescaler so the timer will count at 16Mhz (32Mhz in your case)

Nice one, I must save that code and add it to what I have already for a little test
This would probably get my projects flowing again, I stopped when I encountered the jitter !!

Do you need to clobber r17 ?

r17 is not used. You can remove it from clobber list

smaffer:
Remember to setup the TIMER0 without any prescaler so the timer will count at 16Mhz (32Mhz in your case)

I am already using timer0 to create the h&sync timing

  TCCR0A = _BV(WGM00) | _BV(WGM01) | _BV (COM0A1) | _BV(COM0A0);   // fast PWM H&VSync pulses on OC0A (pin5)
  TCCR0B = _BV(CS01) ;                    // fast PWM, top at 0xFF, div8 clock
  OCR0A = CYCLES_HORZ_SYNC;               // 4us(16 clicks) required for Hsync
  OCR0B = CYCLES_RENDER_START;            // ~12.5us(40 clicks) from line start to render start
  TIMSK0 |= _BV(TOIE0);                   // timer 0 compare B interrupt enable

I see I must have used a div8 on the 32mhz, does this affect your code snippet as you mentioned about NO prescaler

Also, just looking at the ATTiny13A datasheet, it only has ONE 8 bit timer (timer zero) :frowning:

ATTiny13A datasheet