Shared SRAM

I’m looking into hooking up two UNOs so they can share the same SRAM memory chip over SPI. See attached diagram.

Pin 8 on each Arduino is set as digital input. Slave select pin 10 goes to pin 8 of the other Arduino and to an AND gate.

Each Adruino is set up as a Master on the SPI bus. To avoid contention, each Adrduio should check pin 8 before accessing the SRAM chip. If pin 8 is HIGH then the SPI bus is free and SS pin 10 can be set LOW to begin communication. If pin 8 is LOW then the SPI bus is busy and the Arduino needs to wait for this pin to go HIGH.

This scheme may work but I'm worried about connecting the pins SCK, MISO, and MOSI in parallel with each Adruino. I checked the ATmega328 datasheet and it seems like the clock only starts just before communication starts and ends shortly after. I'm not certain about MISO and MOSI.

Any help appreciated.

I think putting some resistors on all the wires coming out of the Unos would prevent cooking the digital pins, in case

  • you have a collision when both Unos try to assert control at the same time, or
  • you have a logic problem with your sketch.

Vmaximus:
I’m looking into hooking up two UNOs so they can share the same SRAM memory chip over SPI. See attached diagram.

Pin 8 on each Arduino is set as digital input. Slave select pin 10 goes to pin 8 of the other Arduino and to an AND gate.

Each Adruino is set up as a Master on the SPI bus. To avoid contention, each Adrduio should check pin 8 before accessing the SRAM chip. If pin 8 is HIGH then the SPI bus is free and SS pin 10 can be set LOW to begin communication. If pin 8 is LOW then the SPI bus is busy and the Arduino needs to wait for this pin to go HIGH.

This scheme may work but I'm worried about connecting the pins SCK, MISO, and MOSI in parallel with each Adruino. I checked the ATmega328 datasheet and it seems like the clock only starts just before communication starts and ends shortly after. I'm not certain about MISO and MOSI.

Any help appreciated.

The different SPIModes control the Clock idle state, Either High or low. You are going to have to isolate the SPI masters from each other. Either by SPI.end() with pinMode(SCK,INPUT); pinMode(MOSI,INPUT); or use a physical Tri-Stage buffer on the SCK, MOSI pins if you put a '367 between you Arduino and connected its enable pin to the '367's EN you could use the one one chip for both Arduino's, you only need to Tri-State SCK, and MOSI, MISO is an input from the Arduino's view. And all of the Slave Devices Tri-stated it when they are not using it.

Chuck.

Hi dmjlambert & chucktodd

Thanks for your reply. Adding resitors and switching ports to tri-state is a good idea. Could keep from blowing out ports.

I'll test using some cheap pro mini from alliexpress (about $2.00 each) in case I blow a port.

My update on shared SPI SRAM circuit.

My code:

// Define SRAM Allocation:

//      Name   Offset      SRAM Bytes
//-----------------------------------
#define Data1    0       //  bytes 0 to 99
#define Data2    100     //  bytes 100 to 199

/*
The sketch running on each arduino are almost identical.
Use #define Arduino1 to program Arduino 1
Comment out #define Arduino1 to program Arduino 2 
*/

#define Arduino1 1

#include <SPI.h>

unsigned long SRAM_Delay;

//---------------------------------
byte GetByte(int Offset, int Index)
//---------------------------------
{
volatile byte pin8;
unsigned int address;
byte data;
/*
Use direct port manipulation so each Arduino can 
grab the SPI lines as soon as possible.
DDRB - The Port B Data Direction Register - read/write
PORTB controls whether pins 8 to 13 are HIGH or LOW.
PINB reads the state of INPUT pins 8 to 13.
13 B5
12 B4
11 B3
10 B2
9  B1
8  B0
B6 and B7 are connected to the crystal! Don't touch.
*/
  while(true)
  {
    pin8 = PINB & B00000001;   // if(pin8 == 0) pin 8 is LOW else pin 8 is HIGH
    while (pin8 == 0) pin8 = PINB & B00000001;
    PORTB = PORTB & B11111011; // set pin 10 LOW
    delayMicroseconds(10);
    pin8 = PINB & B00000001;   // recheck pin 8
    if (pin8 > 0) break;       // if still HIGH then continue
    PORTB = PORTB | B00000100; // else set pin 10 HIGH
  }
  DDRB = DDRB | B00101000;     // set pins 11 and 13 as output
  // Need a small delay between changing port directions
  // and fisrt time use of the port. 
  address = Index + Offset;    // Keep this line here.  
  delayMicroseconds(10);       // a longer delay (?)
  SPI.transfer(3);             // SRAM Read instruction
  SPI.transfer((byte) (address >> 8));     
  SPI.transfer((byte) (address & 0xFF)); 
  data = SPI.transfer(0);
  DDRB = DDRB & B11010111;     // set pins 11 and 13 as input  
  PORTB = PORTB | B00000100;   //set pin 10 HIGH
  delayMicroseconds(SRAM_Delay);
  return data;
}

//---------------------------------------------
void PutByte(int Offset, int Index, byte value)
//---------------------------------------------
{
volatile byte pin8;
unsigned int address;

  while(true)
  {
    pin8 = PINB & B00000001;
    while (pin8 == 0) pin8 = PINB & B00000001;
    PORTB = PORTB & B11111011; // set pin 10 LOW
    delayMicroseconds(10); 
    pin8 = PINB & B00000001;
    if (pin8 > 0) break; 
    PORTB = PORTB | B00000100; //set pin 10 HIGH
  }
  DDRB = DDRB | B00101000;   // set pins 11 and 13 as output    
  // Need a small delay between changing port directions
  // and fisrt time use of the port. 
  address = Index + Offset;  // Keep this line here.  
  delayMicroseconds(10);
  SPI.transfer(2);  // Write instruction
  SPI.transfer((byte) (address >> 8));     
  SPI.transfer((byte) (address & 0xFF)); 
  SPI.transfer(value);
  DDRB = DDRB & B11010111; // set pins 11 and 13 as input  
  PORTB = PORTB | B00000100; //set pin 10 HIGH
  delayMicroseconds(SRAM_Delay);  
}

//----------
void setup()
//----------
{
byte Mode;
  
  pinMode(8, INPUT);  
  pinMode(10, OUTPUT); 
  pinMode(11, OUTPUT);
  pinMode(12, INPUT);
  pinMode(13, OUTPUT);
  
  //Set SPI chip select pins HIGH.
  digitalWrite(10, HIGH);
  
  // Begin SPI
  SPI.beginTransaction(SPISettings(20000000, MSBFIRST, SPI_MODE0));
  delay(10);
  
  Serial.begin(115200);           // start serial for output

  // Read the SRAM Mode
  digitalWrite(10,LOW);
  SPI.transfer(5);  // read mode instruction
  Mode = SPI.transfer(0);
  digitalWrite(10,HIGH);
  
  //Default Mode = 64 or Sequential
  #ifdef Arduino1  
    Serial.print(F("ARDUINO 1 ")); 
    SRAM_Delay = 100;
  #else
    Serial.print(F("ARDUINO 2 ")); 
    SRAM_Delay = 110;
  #endif
  
  Serial.print(F("SRAM Mode = ")); 
  Serial.println(Mode);
  DDRB = DDRB & B11010111; // set pins 11 and 13 as input  
}

//---------
void loop()
//---------
{
/*
Arduino 1 will write to data1 and read from data1
Arduino 2 will write to data2 and read from data2
*/

int i,j,k,l;
int data;
int ReadError,WriteError;

  #ifdef Arduino1
    data = Data1;
  #else
    data = Data2;
  #endif
  
  ReadError = 0;
  WriteError = 0;
  
  for (i = 1; i < 100; i++)
  {
    Serial.println(i);
    for (j = 0; j < 100; j++) PutByte(data,j,i); // test write
    for (j = 0; j < 100; j++) 
    {
      k = GetByte(data,j);
      if (k != i)
      {
        Serial.print(j);
        Serial.print("=");
        Serial.print(k);
        Serial.print(" ");
        l = GetByte(data,j);
        Serial.println(l);
        if (k == l) WriteError++;
        else ReadError++;
      }
    }
    delay(10);
  }
  Serial.print("With delay = ");
  Serial.print(SRAM_Delay);
  Serial.print(" ReadError = ");
  Serial.print(ReadError);
  Serial.print(" WriteError = ");
  Serial.println(WriteError);
  if((ReadError > 0) || (WriteError > 0)) SRAM_Delay = SRAM_Delay + 10;    
}

This can work with no contention.

// Define SRAM Allocation:

//      Name   Offset      SRAM Bytes
//-----------------------------------
#define Data1    0       //  bytes 0 to 99
#define Data2    100     //  bytes 100 to 199

/*
The sketch running on each arduino are almost identical.
Use #define Arduino1 to program Arduino 1
Comment out #define Arduino1 to program Arduino 2 
*/

#define Arduino1 1

#include <SPI.h>

unsigned long SRAM_Delay;

//--------------
void Start_SPI()
//--------------
{
volatile byte pin8;
/*
Use direct port manipulation so each Arduino can 
grab the SPI lines as soon as possible.
DDRB - The Port B Data Direction Register - read/write
PORTB controls whether pins 8 to 13 are HIGH or LOW.
PINB reads the state of INPUT pins 8 to 13.
13 B5
12 B4
11 B3
10 B2
9  B1
8  B0
B6 and B7 are connected to the crystal! Don't touch.
*/
  while(true)
  {
    pin8 = PINB & B00000001;   // if(pin8 == 0) pin 8 is LOW else pin 8 is HIGH
    while (pin8 == 0) pin8 = PINB & B00000001;
    PORTB = PORTB & B11111011; // set pin 10 LOW
    delayMicroseconds(5); //SRAM_Delay);  
    pin8 = PINB & B00000001;   // recheck pin 8
    if (pin8 > 0) break;       // if still HIGH then continue
    PORTB = PORTB | B00000100; // else set pin 10 HIGH
  }
  DDRB = DDRB | B00101000;     // set pins 11 and 13 as output
}

//-------------
void Stop_SPI()
//-------------
{
  DDRB = DDRB & B11010111;     // set pins 11 and 13 as input  
  PORTB = PORTB | B00000100;   //set pin 10 HIGH
}

//---------------------------------
byte GetByte(int Offset, int Index)
//---------------------------------
{

unsigned int address;
byte data;
  Start_SPI();
  // Need a small delay between changing port directions
  // and fisrt time use of the port. 
  address = Index + Offset;    // Keep this line here.  
  SPI.transfer(3);             // SRAM Read instruction
  SPI.transfer((byte) (address >> 8));     
  SPI.transfer((byte) (address & 0xFF)); 
  data = SPI.transfer(0);
  Stop_SPI();
  return data;
}

//---------------------------------------------
void PutByte(int Offset, int Index, byte value)
//---------------------------------------------
{
unsigned int address;

  Start_SPI();
  // Need a small delay between changing port directions
  // and fisrt time use of the port. 
  address = Index + Offset;  // Keep this line here.  
  SPI.transfer(2);  // Write instruction
  SPI.transfer((byte) (address >> 8));     
  SPI.transfer((byte) (address & 0xFF)); 
  SPI.transfer(value);
  Stop_SPI();
}

//----------
void setup()
//----------
{
byte Mode;
  
  pinMode(8, INPUT);  
  pinMode(10, OUTPUT); 
  pinMode(11, OUTPUT);
  pinMode(12, INPUT);
  pinMode(13, OUTPUT);
  
  //Set SPI chip select pins HIGH.
  digitalWrite(10, HIGH);
  
  // Begin SPI
  SPI.beginTransaction(SPISettings(20000000, MSBFIRST, SPI_MODE0));
 // In SPI_MODE0 (default the clock is normally low (CPOL = 0), and data
 // is sampled on the transition from low to high (leading edge) (CPHA = 0) 
  delay(10);
  DDRB = DDRB & B11010111; // set pins 11 and 13 as input  
   
  Serial.begin(115200);           // start serial for output

  // Read the SRAM Mode
  Start_SPI();
  SPI.transfer(5);  // read mode instruction
  Mode = SPI.transfer(0);
  Stop_SPI();
  
  //Default Mode = 64 or Sequential
  #ifdef Arduino1  
    Serial.print(F("ARDUINO 1 ")); 
    SRAM_Delay = 10;  //was 100
  #else
    Serial.print(F("ARDUINO 2 ")); 
    SRAM_Delay = 20;  // was 110
  #endif
  
  Serial.print(F("SRAM Mode = ")); 
  Serial.println(Mode);
}

//---------
void loop()
//---------
{
/*
Arduino 1 will write to data1 and read from data1
Arduino 2 will write to data2 and read from data2
*/

int i,j,k,l;
int data;
int ReadError, WriteError;

  #ifdef Arduino1
    data = Data1;
  #else
    data = Data2;
  #endif
  
  ReadError = 0;
  WriteError = 0;
   
  for (i = 1; i < 100; i++)
  {
    for (j = 0; j < 100; j++) PutByte(data,j,i); // test write
    for (j = 0; j < 100; j++) 
    {
      k = GetByte(data,j);
      if (k != i)
      {
        l = GetByte(data,j);
        if (k == l) WriteError++;
        else ReadError++;
      }
    }
  }
  Serial.print("With delay = ");
  Serial.print(SRAM_Delay);
  Serial.print(" ReadError = ");
  Serial.print(ReadError);
  Serial.print(" WriteError = ");
  Serial.println(WriteError);
  if((ReadError > 0) || (WriteError > 0)) SRAM_Delay = SRAM_Delay + 10;    
}

Could be an issue with more than one slave on the SPI bus.

You have a race condition in your description that will bite you - you indicate you will test pin 8 and if it is high, then you can have the bus. That will periodically fail when both check it at the same time, decide it is free and both start talking at the same time. Every now and then (depending on how much you are both accessing the SRAM), it will fail and will drive you nuts trying to figure out why it fails sometimes. Been there, done that - we had 2 SBP9900 CPU's and a Z80 using shared memory some years ago - used a 3 phase clock to drive the 3 processors and it worked most of the time (ran into another timing issue I was not familiar with - read recovery time on the ram chips - they were Pseudo-static chips (dynamic internally) and part of the spec said you had to leave the chip alone for a certain number of ns for it to recover. When we ran a test program to load the ram (RAM test program running on all 3 processors from the shared ram testing another part of the shared ram) it would fail. Any 2 processors were fine, but start the third one and it would fail. All in those hidden details :confused:

gpsmikey:
You have a race condition in your description that will bite you - you indicate you will test pin 8 and if it is high, then you can have the bus. That will periodically fail when both check it at the same time, decide it is free and both start talking at the same time. Every now and then (depending on how much you are both accessing the SRAM), it will fail and will drive you nuts trying to figure out why it fails sometimes. Been there, done that - we had 2 SBP9900 CPU's and a Z80 using shared memory some years ago - used a 3 phase clock to drive the 3 processors and it worked most of the time (ran into another timing issue I was not familiar with - read recovery time on the ram chips - they were Pseudo-static chips (dynamic internally) and part of the spec said you had to leave the chip alone for a certain number of ns for it to recover. When we ran a test program to load the ram (RAM test program running on all 3 processors from the shared ram testing another part of the shared ram) it would fail. Any 2 processors were fine, but start the third one and it would fail. All in those hidden details :confused:

What do you think about this schematic?

the Code to use it would be something like this:

#define CSPIN 10
#define GOTITPIN 8

bool gotAccess(){
digitalWrite(CSPIN,LOW);
if(!digitalRead(GOTITPIN)) return true; // won access !CS is now Low and have permission to access the chip
digitalWrite(CSPIN,HIGH); // lost arbitration, release !CS and fail.
return false;
}


bool writeRam(uint16_t addr, char *buf, uint8_t len){
unsigned long timeout=millis();
bool ownIt=gotAccess();
while((millis()-timeout<1000)&&(!ownIt)){ // keep trying to get access for up to 1 second
  ownIt = gotAccess();
  }
if(ownIt){ // do write command
  SPI.beginTransaction(SPISettings(14000000, MSBFIRST, SPI_MODE0))
  SPI.transfer(WRITECMD);
  SPI.transfer16(addr);
  for(uint8_t i=0;i<len;i++){
    SPI.transfer(buf[i]);
    }
  digitalWrite(CSPIN,HIGH); // release the SPI ram, 
  SPI.endTransaction();
  return true; // successful write
  }
else { // timeout acquiring access
  Serial.println(' Ram Access Timeout ');
  return false; // timeout
  }
}

Now this circuit and code assume all accesses are atomic, and can be completed in one !CS access. Each time !CS goes high, you have to use gotAccess() to get the right to access the RAM chip. If both CPU's try to access it at the same time, Only the first one will have 'permission'.
The CPU that "failed" has to back out (release the !CS) and keep trying. As soon as both CPU's have released !CS the next CPU to take !CS low gains ownership.

The status on GOTITPIN is only valid after digitalWrite(CSPIN,LOW); operation.

The way the circuit works, is that when both !CS inputs are HIGH, the IC1A gate (NAND) is LOW. As soon as one of the !CS input goes LOW, gate IC1A's output goes HIGH. This rising edge on IC7A CLK pin cause the (D-Latch) to store the current status of !SS2 on it's Q output. IC1B is wired as an inverter. It drives the RAM chip's !CS. The 'winner' is reported by reading the status of the !OWNER outputs. Only ONE of the !OWNER outputs can be low at a time. The State of the D-Latch cannot change until BOTH !CS pins have gone HIGH at the same time.

Chuck.

I think that should work (my coffee has not kicked in yet this morning :confused: ). The secret is, as your circuit addresses, to get the RAM, it is a two step process in this case where it attempts to get it then verifies that it did get it. Somewhat similar actually to Ethernet where it monitors the header as it sends it and if it is corrupted, then a collision occurred and it backs off some pseudo random time and tries again. Your circuit should allow both processors to access the ram although it will slow them both down if there are lots of accesses happening. For occasional shared data storage, it should be OK. Another option is what is actually called "dual port ram" which is built to allow dual access. There are a number of flavors and speeds available although it is more expensive than regular RAM.

gpsmikey:
I think that should work (my coffee has not kicked in yet this morning :confused: ). The secret is, as your circuit addresses, to get the RAM, it is a two step process in this case where it attempts to get it then verifies that it did get it. Somewhat similar actually to Ethernet where it monitors the header as it sends it and if it is corrupted, then a collision occurred and it backs off some pseudo random time and tries again. Your circuit should allow both processors to access the ram although it will slow them both down if there are lots of accesses happening. For occasional shared data storage, it should be OK. Another option is what is actually called "dual port ram" which is built to allow dual access. There are a number of flavors and speeds available although it is more expensive than regular RAM.

I just had another thought; This same circuit could be used to indicate if the 'other' CPU has accessed the RAM. The !OWNER pin status reports who last owned the RAM. So, if one CPU access ram, the !OWNER flag for that CPU is low. It can keep monitoring that signal, or even tie an interrupt routine to this pin. The interrupt could be used to indicate the "Other" has modified the RAM. If this RAM is being used as a message buffer this could alert that new data is present.

Chuck.

gpsmikey:
You have a race condition in your description that will bite you - you indicate you will test pin 8 and if it is high, then you can have the bus. That will periodically fail when both check it at the same time, decide it is free and both start talking at the same time.

Hi gpsmikey:

Thanks for your comments, but that's not exactly how it works. Pin 10 of Arduino 1 is connected to Pin 8 of Arduino 2, and Pin 10 of Arduino 2 is connected to Pin 8 of Adruino 1. The code follows these steps:

  1. Wait until Pin 8 is High
  2. Set Pin 10 Low
  3. Wait a small amount of time
  4. Re-check Pin 8. If Low then set Pin 10 High and go back to Step 1)
  5. Else Pin 8 is High so change Pins 11 and 13 to Outputs

It's critical to have Step 3) and I've replaced delay with a NOP. The most likely failure mode is like a Goofy Gopher scene:

Both Mac and Tosh: "I'd like to use the Bus"
Mac: "Oh, I'm sorry. You go first."
Tosh: "No, I'm sorry. You go first."
Mac: "No. You go first."
Tosh "No. You go first. I insist."
....

I'm trying to figure out how much time is wasted by this Goofy Gopher scene.

The test sketch writes a byte to 100 locations in SRAM, then reads back these 100 locations to check the byte value, which should be equal to the original byte written. The test sketch does this for bytes from 1 to 100. There's a total of 100*100 = 10000 write operations and 10000 read operations.

I thought I could figure out the time wasted from the Goofy Gopher code by

time wasted = T2 - 2*T1

T2 is the amount of time it takes to execute the sketch with both Anduino running. T1 is the amount of time it takes to execute the sketch with only one Anduino running, with no power to the second Arduino and pin 10 of that Arduino going to Vcc.

I'm using 2 Pro Mini running at 3.3 V and 8MHz.

My updated code

// Define SRAM Allocation:

//      Name   Offset      SRAM Bytes
//-----------------------------------
#define Data1    0       //  bytes 0 to 99
#define Data2    100     //  bytes 100 to 199

// This is to define a very short delay:
#define NOP __asm__ __volatile__ ("nop\n\t")

/*
The sketch running on each arduino are almost identical.
Use #define Arduino1 to program Arduino 1
Comment out #define Arduino1 to program Arduino 2 
*/

#define Arduino1 1

#include <SPI.h>

unsigned int Contentions_Avoided;

//--------------
void Start_SPI()
//--------------
{
volatile byte pin8;
/*
Use direct port manipulation so each Arduino can 
grab the SPI lines as soon as possible.
DDRB - The Port B Data Direction Register - read/write
PORTB controls whether pins 8 to 13 are HIGH or LOW.
PINB reads the state of INPUT pins 8 to 13.
13 B5
12 B4
11 B3
10 B2
9  B1
8  B0
B6 and B7 are connected to the crystal! Don't touch.
*/
  while(true)
  {
    pin8 = PINB & B00000001;   // if(pin8 == 0) pin 8 is LOW else pin 8 is HIGH
    while (pin8 == 0) pin8 = PINB & B00000001;
    PORTB = PORTB & B11111011; // set pin 10 LOW
    NOP;                       // short delay
    pin8 = PINB & B00000001;   // recheck pin 8
    if (pin8 > 0) break;       // if still HIGH then continue
    PORTB = PORTB | B00000100; // else set pin 10 HIGH
    Contentions_Avoided++;
  }
  DDRB = DDRB | B00101000;     // set pins 11 and 13 as output
}

//-------------
void Stop_SPI()
//-------------
{
  DDRB = DDRB & B11010111;     // set pins 11 and 13 as input  
  PORTB = PORTB | B00000100;   // set pin 10 HIGH
}

//---------------------------------
byte GetByte(int Offset, int Index)
//---------------------------------
{

unsigned int address;
byte data;
  Start_SPI();
  // Need a small delay between changing port directions
  // and fisrt time use of the port. 
  address = Index + Offset;    // Keep this line here.  
  SPI.transfer(3);             // SRAM Read instruction
  SPI.transfer((byte) (address >> 8));     
  SPI.transfer((byte) (address & 0xFF)); 
  data = SPI.transfer(0);
  Stop_SPI();
  return data;
}

//---------------------------------------------
void PutByte(int Offset, int Index, byte value)
//---------------------------------------------
{
unsigned int address;

  Start_SPI();
  // Need a small delay between changing port directions
  // and fisrt time use of the port. 
  address = Index + Offset;  // Keep this line here.  
  SPI.transfer(2);  // Write instruction
  SPI.transfer((byte) (address >> 8));     
  SPI.transfer((byte) (address & 0xFF)); 
  SPI.transfer(value);
  Stop_SPI();
}

//----------
void setup()
//----------
{
byte Mode;
  
  pinMode(8, INPUT);  
  pinMode(10, OUTPUT); 
  pinMode(11, OUTPUT);
  pinMode(12, INPUT);
  pinMode(13, OUTPUT);
  
  //Set SPI chip select pins HIGH.
  digitalWrite(10, HIGH);
  
  // Begin SPI
  SPI.beginTransaction(SPISettings(20000000, MSBFIRST, SPI_MODE0));
 // In SPI_MODE0 (default the clock is normally low (CPOL = 0), and data
 // is sampled on the transition from low to high (leading edge) (CPHA = 0) 
  delay(10);
  DDRB = DDRB & B11010111; // set pins 11 and 13 as input  
   
  Serial.begin(115200);           // start serial for output

  // Read the SRAM Mode
  Start_SPI();
  SPI.transfer(5);  // read mode instruction
  Mode = SPI.transfer(0);
  Stop_SPI();
  
  //Default Mode = 64 or Sequential
  #ifdef Arduino1  
    Serial.print(F("ARDUINO 1 ")); 
  #else
    Serial.print(F("ARDUINO 2 ")); 
  #endif
  
  Serial.print(F("SRAM Mode = ")); 
  Serial.println(Mode);
}

//---------
void loop()
//---------
{
/*
Arduino 1 will write to data1 and read from data1
Arduino 2 will write to data2 and read from data2
*/

int i,j,k,l;
int data;
int ReadError, WriteError;
unsigned long ExecTime;
  
  #ifdef Arduino1
    data = Data1;
  #else
    data = Data2;
  #endif

  Contentions_Avoided = 0;
  ReadError = 0;
  WriteError = 0;
  ExecTime = micros();
   
  for (i = 1; i <= 100; i++)
  {
    for (j = 0; j < 100; j++) PutByte(data,j,i); // test write
    for (j = 0; j < 100; j++)                    // test read
    {
      k = GetByte(data,j);
      if (k != i)
      {
        l = GetByte(data,j);
        if (k == l) WriteError++;
        else ReadError++;
      }
    }
  }
  ExecTime = micros() - ExecTime;
  Serial.print("ExecTime = ");
  Serial.print(ExecTime);
  Serial.print(" Contentions_Avoided = ");
  Serial.print(Contentions_Avoided);
  Serial.print(" ReadError = ");
  Serial.print(ReadError);
  Serial.print(" WriteError = ");
  Serial.println(WriteError);
}

T2 is about 912.1 ms to 915.1 ms. Contentions_Avoided goes from about 30 to 100. ReadError and WriteError = 0.
T1 with Goofy Gopher code and data checking, execution time is about 604.8 ms.

time wasted = 915.1 ms - 2*604.8 ms = -294.5 ms

:astonished:

Boy, my math can be bad. Don't count on me to solve a differential equation that can save Earth from an asteroid hit.

Take 2:

With only one Arduino running:

501.2 ms with no data checking and no Goofy Gopher code. This is the minimum time needed for a single Arduino Pro Mini, running at 3.3 V and 8Mhz, to access the SRAM chip 20000 times (10000 writes and 10000 reads).

508.8 ms to access the SRAM chip 20000 times, with data checking and no Goofy Gopher code. Adding data checking adds about 7 ms.

1016.8 ms to access the SRAM chip 40000 times, with data checking and no Goofy Gopher code.

604.3 ms to access the SRAM chip 20000 times, with data checking and with Goofy Gopher code.
Adding the Goofy Gopher code adds about 100 ms.

With two Arduino running:

912.1 ms to 915.1 ms with each Arduino accessing the SRAM chip 20000 times, with data checking and Goofy Gopher code. Each Arduino needs to wait about 307.8 ms to 310.8 ms in total to access the SRAM chip 20000 times. But, there's two Arduino. The total SRAM chip access is 40000 times. While each Arduino takes more time to access the SRAM when compared with a single Arduino, having two Arduino is about 101.7 ms to 104.7 ms faster than a single Arduino to access the SRAM chip 40000 times.

I think an Arduino running at 8 MHz may not be taking full advantage of the SRAM chip, with Max speed of 20 MHz.
Results with an Arduino runnig at 5 V and 16 MHz may be different.

A problem with my previous circuit is that it works well with only one shared SPI slave. What about two SPI slaves, a shared SRAM slave chip and a Slave accessed only by one Adruino? I think the circuit and software can still work, but the Arduino with exclusive access to the second slave would need to wait an unnecessarily long amount of time. So, I've replaced Schottky diodes on Arduino 2 with an SN74HC125.

The updated code:

// Define SRAM Allocation:

//      Name   Offset      SRAM Bytes
//-----------------------------------
#define Data1    0       //  bytes 0 to 99
#define Data2    100     //  bytes 100 to 199

// This is to define a very short delay:
#define NOP __asm__ __volatile__ ("nop\n\t")
//  NOP; // delay 62.5ns on a 16MHz AtMega

/*
The sketch running on each arduino are almost identical.
Use #define Arduino1 to program Arduino 1
Comment out #define Arduino1 to program Arduino 2 
*/

//#define Arduino1 1

#include <SPI.h>

unsigned int Contentions_Avoided;

//--------------
void Start_SPI()
//--------------
{
volatile byte pin8;
/*
Use direct port manipulation so each Arduino can 
grab the SPI lines as soon as possible.
DDRB - The Port B Data Direction Register - read/write
PORTB controls whether pins 8 to 13 are HIGH or LOW.
PINB reads the state of INPUT pins 8 to 13.
13 B5
12 B4
11 B3
10 B2
9  B1
8  B0
B6 and B7 are connected to the crystal! Don't touch.
*/
  while(true)
  {
    pin8 = PINB & B00000001;   // if(pin8 == 0) pin 8 is LOW else pin 8 is HIGH
    while (pin8 == 0) pin8 = PINB & B00000001;
    PORTB = PORTB & B11111011; // set pin 10 LOW
    NOP;                       // short delay
    pin8 = PINB & B00000001;   // recheck pin 8
    if (pin8 > 0) break;       // if still HIGH then continue
    PORTB = PORTB | B00000100; // else set pin 10 HIGH
    Contentions_Avoided++;
  }
  #ifdef Arduino1
    DDRB = DDRB | B00101000;     // set pins 11 and 13 as output
  #else
    PORTB = PORTB & B11111101;   // set pin 9 LOW
  #endif
}

//-------------
void Stop_SPI()
//-------------
{
  #ifdef Arduino1
    DDRB = DDRB & B11010111;     // set pins 11 and 13 as input  
    PORTB = PORTB | B00000100;   // set pin 10 HIGH
  #else
    PORTB = PORTB | B00000110;   // set pins 9 and 10 HIGH
  #endif
}

//---------------------------------
byte GetByte(int Offset, int Index)
//---------------------------------
{

unsigned int address;
byte data;
  Start_SPI();
  // Need a small delay between changing port directions
  // and fisrt time use of the port. 
  address = Index + Offset;    // Keep this line here.  
  SPI.transfer(3);             // SRAM Read instruction
  SPI.transfer((byte) (address >> 8));     
  SPI.transfer((byte) (address & 0xFF)); 
  data = SPI.transfer(0);
  Stop_SPI();
  return data;
}

//---------------------------------------------
void PutByte(int Offset, int Index, byte value)
//---------------------------------------------
{
unsigned int address;

  Start_SPI();
  // Need a small delay between changing port directions
  // and fisrt time use of the port. 
  address = Index + Offset;  // Keep this line here.  
  SPI.transfer(2);  // Write instruction
  SPI.transfer((byte) (address >> 8));     
  SPI.transfer((byte) (address & 0xFF)); 
  SPI.transfer(value);
  Stop_SPI();
}

//----------
void setup()
//----------
{
byte Mode;
  
  pinMode(8, INPUT);
  #ifndef Arduino1  
    pinMode(9,OUTPUT);
  #endif
  pinMode(10, OUTPUT); 
  pinMode(11, OUTPUT);
  pinMode(12, INPUT);
  pinMode(13, OUTPUT);
  
  //Set SPI chip select pins HIGH.
  digitalWrite(10, HIGH);
  
  // Begin SPI
  SPI.beginTransaction(SPISettings(20000000, MSBFIRST, SPI_MODE0));
 // In SPI_MODE0 (default the clock is normally low (CPOL = 0), and data
 // is sampled on the transition from low to high (leading edge) (CPHA = 0) 
  delay(10);
  #ifdef Arduino1
    DDRB = DDRB & B11010111; // set pins 11 and 13 as input  
  #else
    digitalWrite(9, HIGH);
  #endif
   
  Serial.begin(115200);           // start serial for output

  // Read the SRAM Mode
  Start_SPI();
  SPI.transfer(5);  // read mode instruction
  Mode = SPI.transfer(0);
  Stop_SPI();
  
  //Default Mode = 64 or Sequential
  #ifdef Arduino1  
    Serial.print(F("ARDUINO 1 Schottky ")); 
  #else
    Serial.print(F("ARDUINO 2 Buffer ")); 
  #endif
  
  Serial.print(F("SRAM Mode = ")); 
  Serial.println(Mode);
}

//---------
void loop()
//---------
{
/*
Arduino 1 will write to data1 and read from data1
Arduino 2 will write to data2 and read from data2
*/

int i,j,k,l;
int data;
int ReadError, WriteError;
unsigned long ExecTime;
  
  #ifdef Arduino1
    data = Data1;
  #else
    data = Data2;
  #endif

  Contentions_Avoided = 0;
  ReadError = 0;
  WriteError = 0;
  ExecTime = micros();
   
  for (i = 1; i <= 100; i++)
  {
    for (j = 0; j < 100; j++) PutByte(data,j,i); // test write
    for (j = 0; j < 100; j++)                    // test read
    {
      k = GetByte(data,j);
      if (k != i)
      {
        l = GetByte(data,j);
        if (k == l) WriteError++;
        else ReadError++;
      }
    }
  }
  ExecTime = micros() - ExecTime;
  Serial.print("ExecTime = ");
  Serial.print(ExecTime);
  Serial.print(" Contentions_Avoided = ");
  Serial.print(Contentions_Avoided);
  Serial.print(" ReadError = ");
  Serial.print(ReadError);
  Serial.print(" WriteError = ");
  Serial.println(WriteError);
}