Two arduinos over Fake SPI (Bit banging) Sync Problem

I want to transmit a command using a FAKE SPI protocol and receive the response accordingly.
The Slave uses the real SPI protocol and replies 0x80 if 0x40 is received.
Here is the code:

Real SPI Slave:

#include <SPI.h>

char buf [100];
volatile byte pos;
volatile byte c;
volatile boolean process_it;
volatile boolean led, reply;

void setup (void)
{
  Serial.begin (115200);
  Serial.println("Setup Done! MEGA _ Slave Real SPI");
  pinMode(13,OUTPUT);
  // have to send on master in, *slave out*
  pinMode(MISO, OUTPUT);

  // turn on SPI in slave mode
  SPCR |= _BV(SPE);

  // get ready for an interrupt
  pos = 0; 
  process_it = false;
  
  SPI.begin();
  // now turn on interrupts
  SPI.attachInterrupt();
}

// SPI interrupt routine
ISR (SPI_STC_vect)
{
  c = SPDR;  // grab byte from SPI Data Register
  process_it = true;
  if (c==0x40)
     {
     reply = true;
     }
}

void loop (void)
{
  if (led )
    {
    digitalWrite(13,!digitalRead(13));
    led = false;
    }
  
  if (process_it)
    {
    Serial.println (c);
    process_it = false;
    }
  
  if (reply)
    {
      SPI.transfer(0x80);
      led = true; reply = false;
    }          
}

And the Master Fake SPI Code:

#define NOP __asm__ __volatile__ ("nop\n\t")

int CS = 7;      // Chip Select
int MSI = 6;     // MOSI _ Master Out Slave In
int MSO = 5;     // MISO _ Master In Slave Out
int CLK  = 4;    // Clock for fake SPI

void setup()
{
  Serial.begin(115200);
  Serial.println("Setup Done! UNO SPI Master (Bit Bang!)");
  
  pinMode(CS,OUTPUT);     
  pinMode(MSI,OUTPUT);    // MOSI
  pinMode(MSO,INPUT);      // MISO
  pinMode(CLK,OUTPUT);
  
  digitalWrite(CS,LOW);
}

void loop()
{
  byte c=0;
  
 // Serial.println(0x40);
  SPI_Send(0x40);
  c = SPI_Receive();
  Serial.print("Receive = ");
  Serial.println(c);
 
  delay(1000);
}

void SPI_Send(char data)
{
  byte mask = 0x80;
  
  //for 8 data bits:
  
  for (int i=0;i<8;i++)
  {
    digitalWrite(CLK,LOW);      // Set clock High    
    delay(10);    // delay = SPI_Freq/2
    
    if (data & mask)            
      {digitalWrite(MSI,HIGH);}    // MOSI High/low
    else
      {digitalWrite(MSI,LOW);}
         
    NOP;NOP;NOP;NOP;  // delay to stabilize the MOSI pin
    
    digitalWrite(CLK,HIGH);    // Set clock low
    
    delay(10);    // delay = SPI_Freq/2;
    
    mask = mask >> 1;      // 1 time shift    
  }
} 

char SPI_Receive()
{
  byte response = 0;
  
  for (int i=0;i<8;i++)
    {
     response <<= 1;  
     digitalWrite(CLK,LOW);    // set Clock bit
     delay(10);  // SPI Delay/2
    
     if (digitalRead(MSO))  // Scan the input pin
        response |= 1;
    
     delay(10);  // SPI Delay/2
     digitalWrite(CLK,HIGH);// clear the Clock bit
    }
  return response;
}

The Slave Screen shows somewhat like this: (64 = 0x40)
64,64,0,64,0,0,64,64,0,0,64,0,64,0,64
it catches some and misses some.

The Master Screen Shows: (128 = 0x80)
Receive = 64,64,64,128,64,128,128,64,128,64,64,64,
The Slave only send 128 or 0x80 ;

How to make it right!

It appears your slave is in SPI mode 0, and you appear to be using mode 3 on the master. The clock polarity may be incorrect. It should be idle LOW and pulse HIGH. Yours is the other way around.

And the slave select is usually the sync line. I see no manipulation of that line.

edit: Mode 0 captures data on the rising edge of the clock, and propagates (changes) on the falling edge. When the falling edge is first, what would normally be bit 7 on the first rising edge will now be bit 6 instead. That would change 64 to 128, would it not?

You should change these to delayMicroseconds().

    delay(10);    // delay = SPI_Freq/2

I Have changed the slave to mode 3 but it doesn't change anything.

You are manipulating the CS line now? It should look something like this:

// Send start of transfer signal
digitalWrite(CS,LOW);
// wait 1us for the slave to get ready
delayMicroseconds(1);

// do your bit banging transfer here

// send end of transfer signal
digitalWrite(CS,HIGH);