Conway's Game Of Life with TVout 128 x 96 pixels

Having just discovered the excellent TVout library, I though I would combine it with one of my old favourites, Conway’s Game Of Life.

This is not a new idea, there’s already a video out there of this being done, but my version is much faster and uses a matrix with far more pixels, making it much more interesting to watch.

Video

Pics attached below, here’s the code:

// Conway's Game Of Life 128x96 using TVout
// P.Beard
// March 2013

#include <TVout.h>

#define matWidth 4
#define matHeight 96

TVout TV;
unsigned long * myScreen;

void setup() {
  TV.begin(PAL, matWidth * 32, matHeight);
  myScreen = (unsigned long *) TV.screen;
  randomSeed(analogRead(0));
  randomiseMatrix();
}

void loop() {
  generateMatrix();
  digitalWrite(13, !digitalRead(13));
}

unsigned long swapBytes(unsigned long x) {
  return ((x & 0x000000ffUL) << 24) | ((x & 0x0000ff00UL) << 8) | ((x & 0x00ff0000UL) >> 8) | ((x & 0xff000000UL) >> 24);
}

void randomiseMatrix() {

  //Set up initial cells in matrix
  for (int r = 0; r < matHeight; r++) {
    for (int c = 0; c < matWidth; c++) {
      myScreen[r * matWidth + c] = random(0xffff) << 16 | random(0xffff);
    }
  }
}

void injectGlider() {

  byte col = random(matWidth);
  byte row = random(matHeight);
  myScreen[(row+0) * matWidth + col] |= B0000111;
  myScreen[(row+1) * matWidth + col] |= B0000001;
  myScreen[(row+2) * matWidth + col] |= B0000010;

}
	
void generateMatrix() {
  
  //Variables holding data on neighbouring cells
  unsigned long NeighbourN[matWidth], NeighbourNW[matWidth], NeighbourNE[matWidth], CurrCells[matWidth], NeighbourW[matWidth];
  unsigned long NeighbourE[matWidth], NeighbourS[matWidth], NeighbourSW[matWidth], NeighbourSE[matWidth], firstRow[matWidth];
	
  unsigned long tot1, tot2, tot4, carry, NewCells;

  int changes = 0; // counts the changes in the matrix
  static int prevChanges = 256; // counts the changes in the matrix on prev generation
  static int staleCount = 0; // counts the consecutive occurrances of the same number of changes in the matrix

  //set up N, NW, NE, W & E neighbour data
  //also take a copy of the first row data for use later when calculating last row
  for (byte b = 0; b < matWidth; b++) {
    NeighbourN[b] = swapBytes(myScreen[(matHeight-1) * matWidth + b]);
    firstRow[b] = CurrCells[b] = swapBytes(myScreen[b]);
  }

  carry = NeighbourN[matWidth-1];
  for (char b = 0; b < matWidth; b++) {
    NewCells = NeighbourN[b];
    NeighbourNW[b] = NewCells >> 1 | carry << 31; 
    carry = NewCells;
  }
  
  carry = NeighbourN[0];    
  for (char b = matWidth-1; b >= 0; b--) {
    NewCells = NeighbourN[b];
    NeighbourNE[b] = NewCells << 1 | carry >> 31;
    carry = NewCells;
  }
	
  carry = CurrCells[matWidth-1];
  for (char b = 0; b < matWidth; b++) {
    NewCells = CurrCells[b];
    NeighbourW[b] = NewCells >> 1 | carry << 31;
    carry = NewCells;
  }
  
  carry = CurrCells[0];    
  for (char b = matWidth-1; b >= 0; b--) {
    NewCells = CurrCells[b];
    NeighbourE[b] = NewCells << 1 | carry >> 31;
    carry = NewCells;
  }
  
  //Process each row of the matrix
  for (byte row = 0; row < matHeight; row++) {
		
    //Pick up new S, SW & SE neighbours
    if (row < matHeight - 1) {
      for (byte b = 0; b < matWidth; b++) {
        NeighbourS[b] = swapBytes(myScreen[(row+1) * matWidth + b]);
      }
    }
    else {
      for (byte b = 0; b < matWidth; b++) {
        NeighbourS[b] = firstRow[b];
      }
    }
  
    carry = NeighbourS[matWidth-1];
    for (char b = 0; b < matWidth; b++) {
      NewCells = NeighbourS[b];
      NeighbourSW[b] = NewCells >> 1 | carry << 31;
      carry = NewCells;
    }
      
    carry = NeighbourS[0];    
    for (char b = matWidth-1; b >= 0; b--) {
      NewCells = NeighbourS[b];
      NeighbourSE[b] = NewCells << 1 | carry >> 31;
      carry = NewCells;
    }
  
    for (char b = 0; b < matWidth; b++) {
      
       //Count the live neighbours (in parallel) for the current row of cells
      //However, if total goes over 3, we don't care (see below), so counting stops at 4
      tot1 = NeighbourN[b];
      tot2 = tot1 & NeighbourNW[b]; tot1 = tot1 ^ NeighbourNW[b];
      carry = tot1 & NeighbourNE[b]; tot1 = tot1 ^ NeighbourNE[b]; tot4 = tot2 & carry; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourW[b]; tot1 = tot1 ^ NeighbourW[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourE[b]; tot1 = tot1 ^ NeighbourE[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourS[b]; tot1 = tot1 ^ NeighbourS[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourSW[b]; tot1 = tot1 ^ NeighbourSW[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourSE[b]; tot1 = tot1 ^ NeighbourSE[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
		
      //Calculate the updated cells:
      // <2 or >3 neighbours, cell dies
      // =2 neighbours, cell continues to live
      // =3 neighbours, new cell born
      NewCells = (CurrCells[b] | tot1) & tot2 & ~ tot4;
      
      //Have any cells changed?
      if (NewCells != CurrCells[b]) {
        myScreen[row * matWidth + b] = swapBytes(NewCells);
        //Count the change for "stale" test
        changes++;
      }
      
      //Current cells (before update), E , W, SE, SW and S neighbours become
      //new N, NW, NE, E, W neighbours and current cells for next loop
      NeighbourN[b] = CurrCells[b];
      NeighbourNW[b] = NeighbourW[b];
      NeighbourNE[b] = NeighbourE[b];
      NeighbourE[b] = NeighbourSE[b];
      NeighbourW[b] = NeighbourSW[b];
      CurrCells[b] = NeighbourS[b];
    } //next col
  } //next row
    
  if (changes != prevChanges) staleCount = 0; else staleCount++; //Detect "stale" matrix
  if (staleCount > 32) injectGlider(); //Inject a glider

  prevChanges = changes;
}

This works REALLY well!

Looks great on my 37" Sony BRAVIA!!!

Thanks!

I made a small change, starts off with R Pentomino rather than a random pond.
R Pentomino spawns 5 gliders in the first 30 odd generations, becomes “stable” after 1800 generations.

My first Apple II Life program took nearly a day to do 1800 generations!

// Conway's Game Of Life 128x96 using TVout
// P.Beard
// March 2013

#include <TVout.h>

#define matWidth 4
#define matHeight 96

TVout TV;
unsigned long * myScreen;

void setup() {
  TV.begin(PAL, matWidth * 32, matHeight);
  myScreen = (unsigned long *) TV.screen;
  randomSeed(analogRead(0));
  randomiseMatrix();
}

void loop() {
  generateMatrix();
  digitalWrite(13, !digitalRead(13));
}

unsigned long swapBytes(unsigned long x) {
  return ((x & 0x000000ffUL) << 24) | ((x & 0x0000ff00UL) << 8) | ((x & 0x00ff0000UL) >> 8) | ((x & 0xff000000UL) >> 24);
}

void randomiseMatrix() {
//start with R pentomino
  byte col = random(matWidth);
  byte row = random(matHeight);
  myScreen[(row+0) * matWidth + col] |= B0000011;
  myScreen[(row+1) * matWidth + col] |= B0000110;
  myScreen[(row+2) * matWidth + col] |= B0000010;
  
//  //Set up initial cells in matrix
//  for (int r = 0; r < matHeight; r++) {
//    for (int c = 0; c < matWidth; c++) {
//      myScreen[r * matWidth + c] = random(0xffff) << 16 | random(0xffff);
//    }
//  }
}

void injectGlider() {

  byte col = random(matWidth);
  byte row = random(matHeight);
  myScreen[(row+0) * matWidth + col] |= B0000111;
  myScreen[(row+1) * matWidth + col] |= B0000001;
  myScreen[(row+2) * matWidth + col] |= B0000010;
}

	
void generateMatrix() {
  
  //Variables holding data on neighbouring cells
  unsigned long NeighbourN[matWidth], NeighbourNW[matWidth], NeighbourNE[matWidth], CurrCells[matWidth], NeighbourW[matWidth];
  unsigned long NeighbourE[matWidth], NeighbourS[matWidth], NeighbourSW[matWidth], NeighbourSE[matWidth], firstRow[matWidth];
	
  unsigned long tot1, tot2, tot4, carry, NewCells;

  int changes = 0; // counts the changes in the matrix
  static int prevChanges = 256; // counts the changes in the matrix on prev generation
  static int staleCount = 0; // counts the consecutive occurrances of the same number of changes in the matrix

  //set up N, NW, NE, W & E neighbour data
  //also take a copy of the first row data for use later when calculating last row
  for (byte b = 0; b < matWidth; b++) {
    NeighbourN[b] = swapBytes(myScreen[(matHeight-1) * matWidth + b]);
    firstRow[b] = CurrCells[b] = swapBytes(myScreen[b]);
  }

  carry = NeighbourN[matWidth-1];
  for (char b = 0; b < matWidth; b++) {
    NewCells = NeighbourN[b];
    NeighbourNW[b] = NewCells >> 1 | carry << 31; 
    carry = NewCells;
  }
  
  carry = NeighbourN[0];    
  for (char b = matWidth-1; b >= 0; b--) {
    NewCells = NeighbourN[b];
    NeighbourNE[b] = NewCells << 1 | carry >> 31;
    carry = NewCells;
  }
	
  carry = CurrCells[matWidth-1];
  for (char b = 0; b < matWidth; b++) {
    NewCells = CurrCells[b];
    NeighbourW[b] = NewCells >> 1 | carry << 31;
    carry = NewCells;
  }
  
  carry = CurrCells[0];    
  for (char b = matWidth-1; b >= 0; b--) {
    NewCells = CurrCells[b];
    NeighbourE[b] = NewCells << 1 | carry >> 31;
    carry = NewCells;
  }
  
  //Process each row of the matrix
  for (byte row = 0; row < matHeight; row++) {
		
    //Pick up new S, SW & SE neighbours
    if (row < matHeight - 1) {
      for (byte b = 0; b < matWidth; b++) {
        NeighbourS[b] = swapBytes(myScreen[(row+1) * matWidth + b]);
      }
    }
    else {
      for (byte b = 0; b < matWidth; b++) {
        NeighbourS[b] = firstRow[b];
      }
    }
  
    carry = NeighbourS[matWidth-1];
    for (char b = 0; b < matWidth; b++) {
      NewCells = NeighbourS[b];
      NeighbourSW[b] = NewCells >> 1 | carry << 31;
      carry = NewCells;
    }
      
    carry = NeighbourS[0];    
    for (char b = matWidth-1; b >= 0; b--) {
      NewCells = NeighbourS[b];
      NeighbourSE[b] = NewCells << 1 | carry >> 31;
      carry = NewCells;
    }
  
    for (char b = 0; b < matWidth; b++) {
      
       //Count the live neighbours (in parallel) for the current row of cells
      //However, if total goes over 3, we don't care (see below), so counting stops at 4
      tot1 = NeighbourN[b];
      tot2 = tot1 & NeighbourNW[b]; tot1 = tot1 ^ NeighbourNW[b];
      carry = tot1 & NeighbourNE[b]; tot1 = tot1 ^ NeighbourNE[b]; tot4 = tot2 & carry; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourW[b]; tot1 = tot1 ^ NeighbourW[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourE[b]; tot1 = tot1 ^ NeighbourE[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourS[b]; tot1 = tot1 ^ NeighbourS[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourSW[b]; tot1 = tot1 ^ NeighbourSW[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourSE[b]; tot1 = tot1 ^ NeighbourSE[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
		
      //Calculate the updated cells:
      // <2 or >3 neighbours, cell dies
      // =2 neighbours, cell continues to live
      // =3 neighbours, new cell born
      NewCells = (CurrCells[b] | tot1) & tot2 & ~ tot4;
      
      //Have any cells changed?
      if (NewCells != CurrCells[b]) {
        myScreen[row * matWidth + b] = swapBytes(NewCells);
        //Count the change for "stale" test
        changes++;
      }
      
      //Current cells (before update), E , W, SE, SW and S neighbours become
      //new N, NW, NE, E, W neighbours and current cells for next loop
      NeighbourN[b] = CurrCells[b];
      NeighbourNW[b] = NeighbourW[b];
      NeighbourNE[b] = NeighbourE[b];
      NeighbourE[b] = NeighbourSE[b];
      NeighbourW[b] = NeighbourSW[b];
      CurrCells[b] = NeighbourS[b];
    } //next col
  } //next row
    
  if (changes != prevChanges) staleCount = 0; else staleCount++; //Detect "stale" matrix
  if (staleCount > 32) injectGlider(); //Inject a glider

  prevChanges = changes;
}

cyberteque: My first Apple II Life program took nearly a day to do 1800 generations!

So, how long does the Arduino take to do 1800 generations?

The Arduino has a 16MHz clock compared to the 2MHz on the Apple II's 6502, and can perform most instructions in a single clock cycle, whereas most instructions took at least 2 cycles on the 6502 I seem to remember. So Arduino should outperform it by at least 4 times. But then again, the Arduino is spending a significant (>50%?) part of its time running the TVout code to generate the video signal, whereas the Apple had a dedicated video chip separate to the CPU.

When you say "becomes stable after 1800 generations", doesn't that depend on the grid size?

"becomes stable after 1800 generations without wrap around" I should have said! On the Arduino R-pentomino takes around 3-4 minutes to become stable.

My Apple II code started out in Applesoft floating basic, going to integer Apple Basic sped things up, but took too long, so I wrote a hand assembled machine code version that ran pretty damn fast

Hi cyberteque,

Thought you might be interested to see this. Starts with R-Pentomino like you suggested. The Micro Pro drives a 128x64 SPI OLED at around 45 generations per sec.

http://youtu.be/xTl5rC6BvgY

How would one write to something like a tft using the utft library? I was planning to use myGLCD.drawPixel(x, y); to draw pixels that are alive, but looks like you're pointing to a variable in the tv library?

I think the answer is “very slowly”. My versions of life run quickly on Arduino because they process multiple cells in parallel and draw them to the screen in parallel. Doing what you suggest would take many times longer, unfortunately. So far I have used this technique on 128x64 lcd and oled screens with 8 bit parallel and spi interfaces as well as tv out. In each case, 1 cell = 1 bit = 1 pixel, so minimal time is spent translating the grid into pixels. I have not attempted life on colour screens yet (where multiple bits = 1 pixel).

nevermind got it . Running on a 240 x 400 lcd display. The pixels are too small to have each one represent life, so there' a 3x magnification in the code.

This is for my fractal generating desk clock... I thought game of life would be a nice addition

https://www.youtube.com/watch?v=Dx4rm3gf0iw

Not bad! Faster than I thought it would be.

Your left/right wrap-around doesn't seem to be working yet. Can't tell about top/bottom on thst vid.

Care to share your sketch?

nothing special I cheated
https://www.pjrc.com/store/teensy31.html

Ah! I've been thinking about getting one of those for a while. 5 times the clock speed, lots more ram and 32 bit instructions would enable a much bigger matrix with more chance to see some of the rarer "species" appearing.

Have you figured out how to use a dma channel to update the display via spi?

Teensy3.1 now overclocks up to 168MHz. Game of life with teensy at 144Mhz... Testing some graphing algorithms for a barometer/temp/humidity. Currently graphing life count

https://www.youtube.com/watch?v=4mj3rdB8HNs

The DUE overclocks to 114MHz. 512KB flash and 96KB RAM.

Costs just about nothing.

I have 10 of them in my "good to have" box.

This is great, thanks for sharing.

For some reason I was getting the following error message when trying to compiling for my Uno-

game_of_life_tv_out.ino: In function ‘void setup()’:
game_of_life_tv_out.ino:15:12: error: ‘PAL’ was not declared in this scope
Error compiling.

I was able to make it work by replacing

TV.begin(PAL, matWidth * 32, matHeight);

with

TV.start_render(_NTSC);

marklfarkl: I was able to make it work by replacing

TV.begin(PAL, matWidth * 32, matHeight);

with

TV.start_render(_NTSC);

It seems that you have an old version of the library.

this "program" is now called "fred"

I was showing it to my dippy mate, he kept asking "What's the program called?"

I gave up trying to tell him it was just Conway's Life running on an Arduino

In the end I got fed up and in frustration said "It's called Fred! Happy now?"

How are you getting a resolution of 128 x 96 to work? I though TVout supported a max resolution of 120x96.

I have created a cool clock that works well. Game of life works well too. However I tried to implement a toggle in my code that switches the display between Game of Life and my clock with a button press. As soon as Game of Life starts, the display cuts out. Anyone know why? Is it becasue I have my resolution set to 120x96?

I trimmed out all the clock stuff to make this easier to read. What in the Game of Life code needs to be changed for this to work at 120x96? I tried changing TV.begin(NTSC, matWidth * 32, matHeight) to TV.begin(NTSC, matWidth * 30, matHeight) but that didn’t work.

#include <font4x6.h>
#include <font8x8.h>
#include <font16x32.h>
#include <retro8x16.h>
#include <Wire.h>
#include <TVout.h>

#define matWidth 4
#define matHeight 96

int modeButton = 8;

TVout TV;
unsigned long * myScreen;
boolean startGame = false;

void setup() {

  pinMode(modeButton, INPUT);

  //TV.begin(NTSC, matWidth * 32, matHeight);
  TV.begin(NTSC, 120, 96);
  myScreen = (unsigned long *) TV.screen;

  randomSeed(analogRead(0));

  Wire.begin();

} //end setup

void loop () {

  if (digitalRead(modeButton) == LOW) {
    TV.delay_frame(20);
    changeGame();
  }

  switch (startGame) {
    case 0:
      displayClock();
      break;

    case 1:
      generateMatrix();
      break;
  }
} //end loop

void changeGame() {

  TV.clear_screen();
  randomiseMatrix;
  startGame = !startGame;
}

//clock
void displayClock() {

  //clock code goes here

}

unsigned long swapBytes(unsigned long x) {
  return ((x & 0x000000ffUL) << 24) | ((x & 0x0000ff00UL) << 8) | ((x & 0x00ff0000UL) >> 8) | ((x & 0xff000000UL) >> 24);
}

void randomiseMatrix() {
  //start with R pentomino
  byte col = random(matWidth);
  byte row = random(matHeight);
  myScreen[(row + 0) * matWidth + col] |= B0000011;
  myScreen[(row + 1) * matWidth + col] |= B0000110;
  myScreen[(row + 2) * matWidth + col] |= B0000010;
}

void injectGlider() {

  byte col = random(matWidth);
  byte row = random(matHeight);
  myScreen[(row + 0) * matWidth + col] |= B0000111;
  myScreen[(row + 1) * matWidth + col] |= B0000001;
  myScreen[(row + 2) * matWidth + col] |= B0000010;
}


void generateMatrix() {

  //Variables holding data on neighbouring cells
  unsigned long NeighbourN[matWidth], NeighbourNW[matWidth], NeighbourNE[matWidth], CurrCells[matWidth], NeighbourW[matWidth];
  unsigned long NeighbourE[matWidth], NeighbourS[matWidth], NeighbourSW[matWidth], NeighbourSE[matWidth], firstRow[matWidth];

  unsigned long tot1, tot2, tot4, carry, NewCells;

  int changes = 0; // counts the changes in the matrix
  static int prevChanges = 256; // counts the changes in the matrix on prev generation
  static int staleCount = 0; // counts the consecutive occurrances of the same number of changes in the matrix

  //set up N, NW, NE, W & E neighbour data
  //also take a copy of the first row data for use later when calculating last row
  for (byte b = 0; b < matWidth; b++) {
    NeighbourN[b] = swapBytes(myScreen[(matHeight - 1) * matWidth + b]);
    firstRow[b] = CurrCells[b] = swapBytes(myScreen[b]);
  }

  carry = NeighbourN[matWidth - 1];
  for (char b = 0; b < matWidth; b++) {
    NewCells = NeighbourN[b];
    NeighbourNW[b] = NewCells >> 1 | carry << 31;
    carry = NewCells;
  }

  carry = NeighbourN[0];
  for (char b = matWidth - 1; b >= 0; b--) {
    NewCells = NeighbourN[b];
    NeighbourNE[b] = NewCells << 1 | carry >> 31;
    carry = NewCells;
  }

  carry = CurrCells[matWidth - 1];
  for (char b = 0; b < matWidth; b++) {
    NewCells = CurrCells[b];
    NeighbourW[b] = NewCells >> 1 | carry << 31;
    carry = NewCells;
  }

  carry = CurrCells[0];
  for (char b = matWidth - 1; b >= 0; b--) {
    NewCells = CurrCells[b];
    NeighbourE[b] = NewCells << 1 | carry >> 31;
    carry = NewCells;
  }

  //Process each row of the matrix
  for (byte row = 0; row < matHeight; row++) {

    //Pick up new S, SW & SE neighbours
    if (row < matHeight - 1) {
      for (byte b = 0; b < matWidth; b++) {
        NeighbourS[b] = swapBytes(myScreen[(row + 1) * matWidth + b]);
      }
    }
    else {
      for (byte b = 0; b < matWidth; b++) {
        NeighbourS[b] = firstRow[b];
      }
    }

    carry = NeighbourS[matWidth - 1];
    for (char b = 0; b < matWidth; b++) {
      NewCells = NeighbourS[b];
      NeighbourSW[b] = NewCells >> 1 | carry << 31;
      carry = NewCells;
    }

    carry = NeighbourS[0];
    for (char b = matWidth - 1; b >= 0; b--) {
      NewCells = NeighbourS[b];
      NeighbourSE[b] = NewCells << 1 | carry >> 31;
      carry = NewCells;
    }

    for (char b = 0; b < matWidth; b++) {

      //Count the live neighbours (in parallel) for the current row of cells
      //However, if total goes over 3, we don't care (see below), so counting stops at 4
      tot1 = NeighbourN[b];
      tot2 = tot1 & NeighbourNW[b]; tot1 = tot1 ^ NeighbourNW[b];
      carry = tot1 & NeighbourNE[b]; tot1 = tot1 ^ NeighbourNE[b]; tot4 = tot2 & carry; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourW[b]; tot1 = tot1 ^ NeighbourW[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourE[b]; tot1 = tot1 ^ NeighbourE[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourS[b]; tot1 = tot1 ^ NeighbourS[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourSW[b]; tot1 = tot1 ^ NeighbourSW[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;
      carry = tot1 & NeighbourSE[b]; tot1 = tot1 ^ NeighbourSE[b]; tot4 = tot2 & carry | tot4; tot2 = tot2 ^ carry;

      //Calculate the updated cells:
      // <2 or >3 neighbours, cell dies
      // =2 neighbours, cell continues to live
      // =3 neighbours, new cell born
      NewCells = (CurrCells[b] | tot1) & tot2 & ~ tot4;

      //Have any cells changed?
      if (NewCells != CurrCells[b]) {
        myScreen[row * matWidth + b] = swapBytes(NewCells);
        //Count the change for "stale" test
        changes++;
      }

      //Current cells (before update), E , W, SE, SW and S neighbours become
      //new N, NW, NE, E, W neighbours and current cells for next loop
      NeighbourN[b] = CurrCells[b];
      NeighbourNW[b] = NeighbourW[b];
      NeighbourNE[b] = NeighbourE[b];
      NeighbourE[b] = NeighbourSE[b];
      NeighbourW[b] = NeighbourSW[b];
      CurrCells[b] = NeighbourS[b];
    } //next col
  } //next row

  if (changes != prevChanges) staleCount = 0; else staleCount++; //Detect "stale" matrix
  if (staleCount > 32) injectGlider(); //Inject a glider

  prevChanges = changes;
}

I tried changing all the 31s to 29s, but no luck either.