16-bit sample & store, then readback & play, finally store to SD

I'm thinking about how to make a 16-bit mono sound sampling system, with playback of the samples sound, and storage to SD card if playback is okay.
Wil then take the SD card & put in a PC for some post capture manipulation.

My planned architecture - AD7680 16 bit ADC for audio capture, Atmega1284 for control, a bank of 8 23LC1024 128Kx8 SRAMs for intermediate, and for playback a AD5662 16 bit DAC. And SD card for "longterm" storage.

I have this code done up for capture & playback. Any chance of it working fast enough to sample at 44,100? If not, what can be improved?
Datasheets & .ino file attached.

/* Crossroads, 26 July 2013
test for reading from 16 bit ADC
AD7680, needs 24 bit read, max SPI speed 2.5MHz
write read data to bank of 23LC1024 SRAM, eight 128K x 8 SRAM
((128 * 1024)bytes/bank * 8 banks)/88200 bytes/sec = 11.88S
Need two bytes/sample for 16-bit sampling
Needs to be realtime

playback, read SRAM, write to AD5662B (A?) DAC, write as 24 bit sample
Needs to be realtime

Storage - store on SD card once decide sample is good, move to PC for cleanup, trimming, etc.  Doesn't need to be realtime.
*/
// pins 0,1 used for Serial
#include <SPI.h>
// pins 11,12,13 used for SPO
#include (SDfat.h>
byte csSD = 10; // SD card chip select
byte csADC = 7; // ADC chip select
byte csDAC = 6 ; // DAC chip select
byte csSRAM[] = {22,23,24,25,26,27,28,29};  // SRAM chip selects
byte recordButton = 14;
byte endButton = 15;
byte playbackButton = 16;
byte storeSDButton = 17; // need autonamimg somehow
byte tbd18Button = 18;
byte tbd19Button = 19;
byte tbd20Button = 20;
byte activeLed = 21; // light up to show recording, playback, storage
byte free[] = {4,5,8,9,30,31,};


// ATMEL ATMEGA1284P on Bobuino & Project usage
//
//                    +---\/---+
//  free  (D 4) PB0 1 |        | 40 PA0 (D 21) AI 7  activeLED
//  free  (D 5) PB1 2 |        | 39 PA1 (D 20) AI 6  tbd20
//  csDAC (D 6) PB2 3 |INT2    | 38 PA2 (D 19) AI 5  tbd19
//  csADC (D 7) PB3 4 |PWM     | 37 PA3 (D 18) AI 4  tbd18
//  csSD (D 10) PB4 5 |PWM     | 36 PA4 (D 17) AI 3  storeSD
//  MOSI (D 11) PB5 6 |        | 35 PA5 (D 16) AI 2  playback
//  MISO (D 12) PB6 7 |PWM     | 34 PA6 (D 15) AI 1  end
//   SCK (D 13) PB7 8 |PWM     | 33 PA7 (D 14) AI 0  record
//              RST 9 |        | 32 AREF
//             VCC 10 |        | 31 GND 
//             GND 11 |        | 30 AVCC
//           XTAL2 12 |        | 29 PC7 (D 29) SRAM7
//           XTAL1 13 |        | 28 PC6 (D 28) SRAM6
//   RX0 (D 0) PD0 14 |     TDI| 27 PC5 (D 27) SRAM5
//   TX0 (D 1) PD1 15 |     TDO| 26 PC4 (D 26) SRAM4
//   RX1 (D 2) PD2 16 |INT0 TMS| 25 PC3 (D 25) SRAM3
//   TX1 (D 3) PD3 17 |INT1 TCK| 24 PC2 (D 24) SRAM2
//  free(D 30) PD4 18 |PWM  SDA| 23 PC1 (D 23) SRAM1
//  free (D 8) PD5 19 |PWM  SCL| 22 PC0 (D 22) SRAM0
//  free (D 9) PD6 20 |PWM  PWM| 21 PD7 (D 31) free
//                    +--------+
//
byte x;

unsigned long currentTime;
unsigned long duration;
unsigned long memFree = 524288;
byte recording;
byte playing;
byte storing;
byte buttons;
byte adc0;
byte adc1;
byte adc2;
byte SRAMbank = B11111110; // first bank
byte SRAMaddress0; // LSB
byte SRAMaddress1; // middle
byte SRAMaddress2; // MSB

/*********************************************/

void setup(){
  //Serial.begin (115200); // for debug testing
pinMode (csSD, OUTPUT);
pinMide (csADC, OUTPUT);
pinMode (csDAC, OUTPUT);
for (x=0; x<8; x=x+1){
pinMode (csSRAM[x], OUTPUT);
}
for (x=0; x<6; x=x+1){
pinMode (free[x], INPUT_PULLUP);
}
pinMode (recordButton, INPUT_PULLUP);
pinMode (endButton, INPUT_PULLUP);
pinMode (playbackButton, INPUT_PULLUP);
pinMode (storeSDButton, INPUT_PULLUP);
pinMode (activeLed, OUTPUT);

SPI.begin();
set SPI Speed to 2 MHz (divide by 4)

} // end setup

/*******************************************/

void loop(){
buttons = PINA;  // read buttons port & split out the bits
startStatus = buttons & B10000000;
endStatus = buttons & B01000000; 
playStatus = buttons & B0010000;
storeStatus = buttons & B00010000;

// start recording, or keep recording
// 44,100 rate = sample every 362 clocks, 22.6uS
if ( (((startStatus == 0) && (endStatus != 0) && (playStatus != 0) && (storeStatus != 0) ) && (recording == 0)) ){
recording = 1; // turn on recording flag
SRAMaddress0 = 0; // set SRAM start address
SRAMaddress1 = 0; // set SRAM start address
SRAMaddress2 = 0; // set SRAM start address
SRAMbank = B11111110; // start at first chip
PORTA = PORTA | B00000001; // turn on status LED
}

while (recording == 1){  // stay here until memory is full, or end is pressed
currentTime = micros();
 if ( currentTime - previousTime) <=22){ 
 previousTime = previousTime + 22;    
 // access ADC
 PORTB = PORTB & B11110111; // clear csADC
 adc2 = SPI.transfer(0); // MSB 0000 xxxx
 adc1 = SPI.transfer(0); //     xxxx xxxx
 adc0 = SPI.transfer(0); //     xxxx 0000 LSB
 PORTB = PORTB | B00001000; // set csADC
 // clean up into 2 bytes
 upperADC = (adc2 <<4) + (adc1 >> 4);
 lowerADC = (adc1 <<4) + (adc0 >> 4);
 // write to SRAM
 PORTC = SRAMbank; // clear SRAM chip select 0 to 7
 SPI.transfer (SRAMaddress2);
 SPI.transfer (SRAMaddress1);
 SPI.transfer (SRAMaddress0);
 SPI.transfer (upperADC);
 SPI.transfer (lowerADC);
 PORTC = 0xFF; // set SRAM chip select 0 to 7
 // update for next address and/or bank
 SRAMaddress0 = SRAMaddress0+1;
  if (SRAMaddress0 == 0){
    // lower address rolled over, increment middle address
    SRAMaddress1 = SRAMaddress1 +1; // 
    if (SRAMaddress1 == 0){
        // middle address rolled over, increment upper address
        SRAMaddress2 = SRAMaddress2 + 1; // 
        if (SRAMaddress2 == 2){ //gone thru all addresses,rollover to next bank
            SRAMaddress2 = 0; // clear upper bit, shift the 0 for next cs bit
            SRAMbank = SRAMbank <<1 + 0x01; // B11111110 -> B11111101, etc.
            if (SRAMbank == 0xFF){  // shifted a 0 across all outputs
                recording = 0; // Done!
                PORTA = PORTA & B11111110; // turn off status LED
            } // SRAM bank rollover
        } // upper address rollover
     } // middle address rollover
   } // lower address rollover
  // check endButton, stop if pressed
     if ((PINA & B01000000) == 0){
           recording = 0;
     }
  } // time check
} // while record/store loop

AD7680 16bit ADC SPI.pdf (295 KB)

23LC1024 128Kx8 SRAM.pdf (481 KB)

AD5662 DAC 16 bit.pdf (578 KB)

sample_playback_store.ino (8.6 KB)

2nd part of code

/***************/
// start playback
if ( (startStatus != 0) && (endStatus != 0) && (playStatus == 0) && (storeStatus != 0) || playing == 0){
playing = 1;
SRAMaddress0 = 0; // set SRAM start address
SRAMaddress1 = 0; // set SRAM start address
SRAMaddress2 = 0; // set SRAM start address
SRAMbank = B11111110; // start at first chip
PORTA = PORTA | B00000001; // turn on status LED
}
while (playing == 1){ // stay here until playback is done, or end is pressed
currentTime = micros();
 if ( currentTime - previousTime) <=22){   
 previousTime = previousTime + 22;                       
 // read SRAM
 PORTC = SRAMbank; // clear SRAM chip select 0 to 7
 SPI.transfer (SRAMaddress2);
 SPI.transfer (SRAMaddress1);
 SPI.transfer (SRAMaddress0);
 upperDAC = SPI.transfer (0);
 lowerDAC = SPI.transfer (0);
 PORTC = 0xFF; // set SRAM chip select 0 to 7
 
// write data to DAC 000000xx xxxxxxxx xxxxxxxx
 // first 2 bits are 0 for normal operations
 PORTB = PORTB & B11111011; // clear csDAC
 SPI.transfer(0);
 SPI.transfer (upperDAC);
 SPI.transfer (lowerDAC);
 PORTB = 0xFF; // clear csDAC
 
 // update for next address and/or bank
 SRAMaddress0 = SRAMaddress0+1;
  if (SRAMaddress0 == 0){
    // lower address rolled over, increment middle address
    SRAMaddress1 = SRAMaddress1 +1; 
    if (SRAMaddress1 == 0){
        // middle address rolled over, increment upper address
        SRAMaddress2 = SRAMaddress2 + 1; 
        if (SRAMaddress2 == 2){ // gone thru all addresses,rollover to next bank
            SRAMaddress2 = 0; // clear upper bit, shift the 0 for next cs bit
            SRAMbank = SRAMbank <<1 + 0x01; // B11111110 -> B11111101, etc.
            if (SRAMbank == 0xFF){  // shifted a 0 across all outputs
                playing = 0; // Done!
                PORTA = PORTA & B11111110; // turn off status LED
            } // SRAM bank rollover
        } // upper address rollover
     } // middle address rollover
   } // lower address rollover
  // check endButton, stop if pressed
     if ((PINA & B01000000) == 0){
           playing = 0;
           // leave DAC at 2.5V output?
           // first 2 bits are 0 for normal operations
           PORTB = PORTB & B11111011; // clear csDAC
           SPI.transfer(0); // 
           SPI.transfer (0x00); // upperDAC
           SPI.transfer (0xFF); // lowerDac
           PORTB = 0xFF; // clear csDAC
     }
  } // time check
} // while read/play loop


/***************/
// store to SD card - use sdfat16, haven't worked out how best to do this
if ( (startStatus != 0) && (endStatus == 0) && (playStatus != 0) && (storeStatus != 0) || storing == 1 ){
storing = 1;
PORTA = PORTA | B00000001; // turn on status LED
}
while (storing ==1){
// etc.
// auto create next file name, open file, store data from SRAM, close file
// 8 x 128K = 1M files
// store it all, clean up sample beginning, trim off ends in the PC process
}

} // end loop

It would probbly be better to put recording and playback into a timer interrupt to avoid interrupts causing unwanted glitches to your sound.

Interrupts occurring from what? There will be nothing else running while recording or playing back.

How does micros() work?

FYI, my experience with SD.h library was that it reads around 14KB/s but sdfat should be faster.Playback from SD card may be hard.

Arduino due has audio out. I wonder if its library may provide some insight for your project. My last project with sound was while back when they released some tech notes on sound blaster (TM). PCs have enough memory and DMA :slight_smile:

CrossRoads:
I have this code done up for capture & playback. Any chance of it working fast enough to sample at 44,100?

First, some sanity checks...

16000000 cycles per second / (44100 samples per second * 2 bytes per sample) = 181 cycles per sample. Should be enough processor time.

// set SPI Speed to 2 MHz (divide by 4)
2000000 bits per second / 16 bits per value = 125000 values per second. Should be enough bandwidth.

A quick and simple optimization...

const byte csSD = 10; // SD card chip select
const byte csADC = 7; // ADC chip select
const byte csDAC = 6 ; // DAC chip select
...

So you're suggesting disabling millis() and micros() and somehow making a 22.6uS timer interrupt that will perhaps set a flag, and when the code sees the flag set then do an ADC sample and store to SRAM, or alternately an SRAM read out and write out to the DAC?
I don't know what's involved in making that timer interrupt.

My plan was to use Sdfat.h:
#include (SDfat.h>
I am not interested in playback from SD, only playback from SRAM to confirm the sampled sound. When satisfied with the sample, then move it from SRAM to SD card for further manipulation on a PC.
Final goal is electronic drums, standalone from any PC.
Or possibly an inexpensive netbook for loading cleaned up sounds into SRAM or FRAM, one bank/drum. And then just playback from SRAM/FRAM into DAC.

const - does that help with speed?

CrossRoads:
const - does that help with speed?

Yes. Each should const eliminate one to a half-dozen machine instructions.

Just referring to:

Interrupts occurring from what? There will be nothing else running while recording or playing back.

I.e. micros() you are using is implemented with a timer interrupt. I don't know if that results in any audiable artifacts (depends on your uC and micros() interrupt handler) but with timer interrupt you can quarantee the frequency of your output without clicks and snaps in the sound. Your interrupt handler should just push those audio samplers to the DAC, i.e. essentially the code you have inside the if-statement that checks the 22uS delta time should go there.

So instead of this

currentTime = micros();
 if ( currentTime - previousTime) <=22){   
 previousTime = previousTime + 22;

do something that waits for a timer interrupt instead?

Heading out to play, will see happens.

I came across Nick Gammon's page on timers

not sure I can get 44,100 no matter what I do.
Maybe set it for 50,000, see how long the code takes, and some no-ops to finish slowing it down before enabling the interrupt again?

Will start with a baseline, see what it looks like ...

do something that waits for a timer interrupt instead?

No, I mean write your own interrupt handler and put the code there. Like I did in this simple audio sample player

Okay, I've got this thing free-running, going flat out with no timing measurements or interrupts in the code, just whatever milis() and micros() are doing in the background. Results from micros() captured just before sampling starts and right after it ends, and the same for playback:

Start recording
done from record memory
14696056 (uS)
Start playback
done from playback memory
13706116 (uS)

Total memory accessed: 8 * 128 *1024 = 1048576 bytes/2 = 524,288 words / 14.696 seconds = 35,675 16-bit samples/second while recording.
How do I squeeze a few more seconds out of this? I want to be down to 11.888 seconds, so 2.808 seconds faster.

Reading out, it's a little quicker because the 16 bits of data can go right from memory into the DAC, the 16 bits don't need to be pulled from the middle of the 24 bits out of the ADC.
524,288/13.706 = 38,252 samples/second. So I want 1.818 seconds improvement there.

The numbers above assume ADC, DAC, and memory running 8 MHz and not the 2MHz I started with, that was a big improvement:

Start recording
done from record memory
39953364 (uS)
Start playback
done from playback memory
38950760 (uS)

If I could find ADC & DAC that only needed 2 byte transfers to read out & write in, that would help:

Start recording
done from record memory
12123192 (uS)
Start playback
done from playback memory
12064720 (uS)

524,288/12.123sec = 43,246 samples/sec. Close to CD quality.

What else can I do to get faster? I don't know if 16 bit, 2-byte transfers ADC/DAC even exist, the choices were getting limited at 16 bit already, hence the AD7680 & AD5662B (datasheets posted earlier).

Here's the capture loop, full code is attached below:

  while (recording == 1){  // stay here until memory is full, or end is pressed
    //currentTime = micros();
    //if ( ( currentTime - previousTime) >=duration){ 
    //  previousTime = previousTime + duration;    
      // access ADC
      PORTB = PORTB & B11110111; // clear csADC
      adc2 = SPI.transfer(0); // MSB 0000 xxxx
      adc1 = SPI.transfer(0); //     xxxx xxxx
      //adc0 = SPI.transfer(0); //     xxxx 0000 LSB
      PORTB = PORTB | B00001000; // set csADC - csADC is toggling
      // clean up into 2 bytes
      //upperADC = (adc2 <<4) + (adc1 >> 4);
      //lowerADC = (adc1 <<4) + (adc0 >> 4);

      // write to SRAM
      //Serial.print(" sram sel ");
      //Serial.println (SRAMbank, HEX);
      PORTC = SRAMbank; // clear SRAM chip select 0 to 7
      SPI.transfer (SRAMaddress2);
      SPI.transfer (SRAMaddress1);
      SPI.transfer (SRAMaddress0);
      SPI.transfer (upperADC);
      SPI.transfer (lowerADC);
      PORTC = 0xFF; // set SRAM chip select 0 to 7
      // update for next address and/or bank
      SRAMaddress0 = SRAMaddress0+1;
      if (SRAMaddress0 == 0){
        // lower address rolled over, increment middle address
        //Serial.println ("low address roll");
        SRAMaddress1 = SRAMaddress1 +1; // 
        if (SRAMaddress1 == 0){
          // middle address rolled over, increment upper address
          //Serial.println (" middle address roll");
          SRAMaddress2 = SRAMaddress2 + 1; // 
          if (SRAMaddress2 == 2){ //gone thru all addresses,rollover to next bank
            SRAMaddress2 = 0; // clear upper bit, shift the 0 for next cs bit
            //Serial.print ("Sb ");
            SRAMbank = (SRAMbank <<1) + 0x01; // B11111110 -> B11111101, etc.
            //Serial.println (SRAMbank,BIN);
            if (SRAMbank == 0xFF){  // shifted a 0 across all outputs
            endTime = micros();
              recording = 0; // Done!
              Serial.println ("done from record memory");
              Serial.println (endTime - startTime);
              
              PORTA = PORTA & B11111110; // turn off status LED
            } // SRAM bank rollover
          } // upper address rollover
        } // middle address rollover
      } // lower address rollover
      // check endButton, stop if pressed - this read works
      //buttons = PINA;
      //if ((PINA & B01000000) == 0){
      //  recording = 0;
      //  Serial.println ("done from record end");
      //  PORTA = PORTA & B11111110; // turn off status LED
      //}
    //} // time check
  } // while record/store loop

sample_playback_store.ino (10.7 KB)

I don't understand your read/write of SRAM. I have used 23LCV1024 SRAM, the battery backup NVRAM version, and it requires four bytes to send an instruction and address. It appears that the 23LC1024 requires the same sequence.

See Figure 2-6 of the 23A1024/23LC1024 data sheet. You must send a 0X02 instruction byte before the three byte address for a write.

Reading and writing two bytes at a time to the SRAM is very slow since it requires a transfer of six bytes total.

I have used the SRAM to read and save an SPI ADC at high rate. I read the ADC in an ISR for timer 1 using bit-bang and store the data in 512 byte buffers. In loop() I write buffers to SRAM in 512 byte chunks. I wrote an optimized driver to write the SRAM, not the Arduino SPI library. The driver can write a 512 byte block to SRAM in about 800 microseconds or about 1.56 microseconds per byte.

You can't run the AD7680 at 8 MHz since the SPI clock is used for the SAR ADC and the max clock rate is 2.5 MHz. See Table 4 of the AD7680 data sheet. The SPI clock must be between 250kHz and 2.5 MHz.

My approach for SPI ADC/DACs is to write a bit-bang driver for the ADC/DAC and connect them to their own pins. I use the AVR SPI controller for the mass storage device.

I used this method in a library for the Adafruit Wave Shield http://code.google.com/p/wavehc/downloads/list. It uses a 12-bit DAC so it only plays 12 of 16 bits but it can play 16-bit 44.1 ksps Wave files from an SD.

It looks like a bit-bang driver would only require 20 clocks to read the AD7680 ADC, see . I tend to get about a 2 MHz clock rate in optimized bit-bang SPI drivers so that would be 10 microseconds per sample. It should be possible but would require tricky programming. See Figure 21. AD7680 Serial Interface Timing Diagram—20 SCLK Transfer in the data sheet.

Yeah, you're correct - I missed the sentence in the datasheet that describes in figures 2-5 and 2-5 (now that I've re-read it) the instruction bytes 0b00000010 & 0b00000011 before the 3 address byte and then 1 or more data bytes. Adding another byte only slows it down some more tho.

I realize I can't run the AD7680 that fast, I mentioned needing a faster ADC so I can use 8 MHz SPI.

Moving up to specialized code to achieve 22.676uS data rate is what I'm after. I could use the help in getting there. Once past direct port manipulation and SPI for speed, I'm kinda stuck.

I don't need to play back from the card, I'll do that at the PC as I clean up the sample. I only want to playback the SRAM to hear the sample on the spot and decide if it's good enough to put on the SD card.

I'll look at some other 16 bit ADC to find a faster one. I really want to reach 44,100 words/sec rate to sample & store from ADC to SRAM, and play from SRAM to DAC.

I was thinking today that parallel SRAM might be an option too.
19 address bits,
16 data bits,
3 control lines,
4 lines for serial ADC/DAC,
Hmm. Use external counter that just need reset and clock line vs 19 address lines, single toggle to clock it up one.

I was able to send out 41 bytes of data really fast doing 41 lines of this:

    PORTB = PORTB & B11111011;  //digitalWrite(SSpin, LOW);
    // monitor D10, confirm XXuS timing
    SPDR = (testArray[fakestartPoint + 0]);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
    SPDR = (testArray[fakestartPoint + 1]);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
    SPDR = (testArray[fakestartPoint + 2]);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
    SPDR = (testArray[fakestartPoint + 3]);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
    SPDR = (testArray[fakestartPoint + 4]);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
    SPDR = (testArray[fakestartPoint + 5]);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
    PORTB = PORTB | B00000100;  //digitalWrite(SSpin, HIGH);

You thing doing the same for the SRAM would help?

Ok, testing is looking better:

Start recording
done from record memory
13253980
Start playback
done from playback memory
11735128

So readback looks like could be fast enough if I can find a fast DAC.
Writing still a little slow.
Entire code attached below.
Write loop:

  // somehow put this in an interrupt timer ISR that runs at 22.676uS when sped up enough
  while (recording == 1){  // stay here until memory is full, or end is pressed    
      // access ADC
      PORTB = PORTB & B11110111; // clear csADC
      adc2 = SPI.transfer(0); // MSB 0000 xxxx
      adc1 = SPI.transfer(0); //     xxxx xxxx
      adc0 = SPI.transfer(0); //     xxxx 0000 LSB
      PORTB = PORTB | B00001000; // set csADC
      // clean up into 2 bytes
      upperADC = (adc2 <<4) + (adc1 >> 4);
      lowerADC = (adc1 <<4) + (adc0 >> 4);

      // write to SRAM
      PORTC = SRAMbank; // clear SRAM chip select 0 to 7
      SPDR = (SRAMinst);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      SPDR = (SRAMaddress2);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      SPDR = (SRAMaddress1);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      SPDR = (SRAMaddress0);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      SPDR = (upperDAC);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      SPDR = (lowerDAC);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      PORTC = 0xFF; // set SRAM chip select 0 to 7
      // update for next address and/or bank
      SRAMaddress0 = SRAMaddress0+1;
      if (SRAMaddress0 == 0){
        SRAMaddress1 = SRAMaddress1 +1; // 
        if (SRAMaddress1 == 0){
          SRAMaddress2 = SRAMaddress2 + 1; // 
          if (SRAMaddress2 == 2){ //gone thru all addresses,rollover to next bank
            SRAMaddress2 = 0; // clear upper bit, shift the 0 for next cs bit
            SRAMbank = (SRAMbank <<1) + 0x01; // B11111110 -> B11111101, etc.
            if (SRAMbank == 0xFF){  // shifted a 0 across all outputs
            endTime = micros();
              recording = 0; // Done!
              Serial.println ("done from record memory");
              Serial.println (endTime - startTime);
              
              PORTA = PORTA & B11111110; // turn off status LED
            } // SRAM bank rollover
          } // upper address rollover
        } // middle address rollover
      } // lower address rollover
  } // while record/store loop

Read loop:

  // somehow put this in an interrupt timer ISR that runs at 22.676uS when sped up enough
  while (playing == 1){ // stay here until playback is done, or end is pressed                       
      // read SRAM
      PORTC = SRAMbank; // clear SRAM chip select 0 to 7
      SPDR = (SRAMinst);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      SPDR = (SRAMaddress2);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      SPDR = (SRAMaddress1);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      SPDR = (SRAMaddress0);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;      
      upperDAC = SPI.transfer (0);
      lowerDAC = SPI.transfer (0);
      PORTC = 0xFF; // set SRAM chip select 0 to 7

      // write data to DAC 000000xx xxxxxxxx xxxxxxxx
      // first 2 bits are 0 for normal operations
      PORTB = PORTB & B11111011; // clear csDAC
      SPDR = (0);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      SPDR = (upperDAC);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      SPDR = (lowerDAC);nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;nop; nop; nop;nop;  nop;
      PORTB = 0xFF; // clear csDAC
      // update for next address and/or bank
      SRAMaddress0 = SRAMaddress0+1;
      if (SRAMaddress0 == 0){
        SRAMaddress1 = SRAMaddress1 +1; 
        if (SRAMaddress1 == 0){
          SRAMaddress2 = SRAMaddress2 + 1; 
          if (SRAMaddress2 == 2){ // gone thru all addresses,rollover to next bank
            SRAMaddress2 = 0; // clear upper bit, shift the 0 for next cs bit
            SRAMbank = (SRAMbank <<1) + 0x01; // B11111110 -> B11111101, etc.
            if (SRAMbank == 0xFF){  // shifted a 0 across all outputs
            endTime = micros();
            playing = 0; // Done!
              Serial.println ("done from playback memory");
              Serial.println(endTime - startTime);
              PORTA = PORTA & B11111110; // turn off status LED
            } // SRAM bank rollover
          } // upper address rollover
        } // middle address rollover
      } // lower address rollover
  } // while read/play loop

sample_playback_store.ino (10.1 KB)

First, you really don't need the SRAM, you can just use a large buffer between the ISR that reads the ADC and the code in loop that writes the SD. I have logged data at up to 100 ksps from an 8 bit ADC and over 50 ksps for two bytes.

I looked at the time to read the AD7680 using my fast bit-bang I/O. The time is about 7-8 microseconds. Here is what the read function, readADC(), and its helpers look like.

//------------------------------------------------------------------------------
/** clock for don't care bits */
STATIC_ALWAYS_INLINE void fastDummy() {
  nop;
  fastDigitalWrite(MCP_SAR_CLK_PIN, 1);
  nop;
  fastDigitalWrite(MCP_SAR_CLK_PIN, 0);
}
//------------------------------------------------------------------------------
/** read next bit fast as possible
 * \param[in] v word to receive bit
 * \param[in] b bit number to be set.  v |= (1 << b) if next bit is high.
 */
STATIC_ALWAYS_INLINE void readBitFast16(uint16_t &v, uint8_t b) {
  fastDigitalWrite(MCP_SAR_CLK_PIN, 1);
  if (fastDigitalRead(MCP_SAR_DOUT_PIN)) v |= (1 << b);
  fastDigitalWrite(MCP_SAR_CLK_PIN, 0);
}
//------------------------------------------------------------------------------
/** Read AD7680 16-bit ADC in less than 8 microseconds
 *  cs is chip select pin
 */
STATIC_ALWAYS_INLINE uint16_t readADC(uint8_t cs) {
  fastDigitalWrite(cs, 0);
  uint16_t v = 0;
  fastDummy();
  fastDummy();
  fastDummy();
  fastDummy();
  readBitFast16(v,  15);
  readBitFast16(v,  14);
  readBitFast16(v,  13);
  readBitFast16(v,  12);
  readBitFast16(v,  11);
  readBitFast16(v,  10);
  readBitFast16(v,  9);
  readBitFast16(v,  8);
  readBitFast16(v,  7);
  readBitFast16(v,  6);
  readBitFast16(v,  5);
  readBitFast16(v,  4);
  readBitFast16(v,  3);
  readBitFast16(v,  2);
  readBitFast16(v,  1);
  readBitFast16(v,  0);
  fastDigitalWrite(cs, 1);
  return v;
}

That would leave plenty of time for the ISR context switch and writing data to the SD.

You use the SPI controller for the SD and three other pins for the ADC. This works well since the SPI controller continues to transfer data to the SD while you are servicing the ADC ISR. You actually get some I/O overlap.

You can play back directly from the SD card. Thousands of Adafruit users do this with the Wave Shield.

Ok, I am asking for programming help then. I don't know enough to tie all the pieces together.
The adafruit waveshield page says "It can play up to 22KHz, 12bit uncompressed audio files of any length."

How do I get from there to 44.1K, 16 bit uncompressed? I think I'm starting to get the concept: use the code above to interrupt every 7-8uS to get a sample, the code is free to run the rest of the time (22.767-8uS = say 15uS less any interrupt jumping back & forth time) to send data out to the SD via hardware-buffered SPI?
Looking at Nick's handy table here

I see 40K, I see 50K.
Hmm, these seem to be timer 2 also - Timer 0 & Timer 2 are 8-bit. Timer1 & Timer3 are 16-bit - can Timer3 be programmed for 22.676uS interrupts?
Deeper hmm - (1/44,100)/(1/16,000,000) = interrupt every 362.8 (363) clock cycles - so from Nick's page 363 >255, so 16 bit timer is needed.
I may get this yet!

[Some further enticing 1284 info:
1284P has 2 UARTs - maybe use that in SPI mode to get some breathing room too.
"The USART RX and TX control logic is replaced by a common SPI transfer control logic."
Common with the regular SPI port, or just between the 2 UARTs?

"16-bit data transfer can be achieved by writing two data bytes to UDRn. A UART transmit complete interrupt will then signal that the 16-bit value has been shifted out."
Good for sending to the DAC - 3 bytes come in from the ADC (or 20 bits if one stop there as you did) so that doesn't really help on the acquisition side.]

Anyway, off to bed - gonna be dragging a little tomorrow...

My library for the Adafruit Wave Shield can play 44.1 ksps 16-bit files. The board only has a 12 bit-DAC so I throw 4 bits away.

I would be happy to help with programming the core stuff like the timer ISR, buffering, bit-bang ADC read and DAC write.

I have all this code done for other ADC and DAC parts so it will only take a bit of modification. Your ADC and DAC are similar to others I have used.

I can use a Mega to check things and let you do real testing.

Here is more on how it would work. This is the ADC read ISR, the buffering, and the SD write function that is called from loop.

// Buffer size in 512 byte blocks.  6 KB for Mega
const uint16_t BUF_BLOCK_COUNT = 12;
// Buffer size in 16-bit samples.
const uint16_t BUF_SIZE = 256*BUF_BLOCK_COUNT;
// Sample buffer.
uint16_t buf[BUF_SIZE];
// Buffer in index.
volatile uint16_t bufIn = 0;
// Buffer out index.
volatile uint16_t bufOut = 0;
// Overflow error
volatile bool bufOverflow = false;
//-----------------------------------------------------
// timer ISR for ADC
ISR(TIMER1_COMPB_vect) {
  // Save sample in buffer.
  buf[bufIn] = readADC(ADC_CS_PIN);
  // Try to advance in index.
  uint16_t in = bufIn + 1;
  if (in == BUF_SIZE) in = 0;
  if (in == bufOut) {
    // Buffer full, can't advance in.
    bufOverflow = true;
  } 
  else {
    // Sample saved in buffer.
    bufIn = in;
  }
}
//----------------------------------------------------------
// Function to write data in buffer to SD.
void writeBuf() {
  uint16_t n;
  uint16_t out;
  // capture volatile bufIn.
  noInterrupts();
  uint16_t in = bufIn;
  interrupts();
  if (bufOut > in) {
    // Write all data to end of buffer.
    n = 2*(BUF_SIZE - bufOut);
    out = 0;
  } 
  else {
    // Number of blocks to write.
    n = (bufIn - bufOut) >> 8;
    if (n == 0) return;
    n *= 256;
    out += n;
    n *= 2;
  }
  file.write(&buf[bufOut], n);
  noInterrupts();
  // New out.
  bufOut = out;
  interrupts();
}

The SD write function always write a multiple of 512 bytes and SdFat is very efficient for in this mode.

Here are test results for 1024 byte I/O on a Mega.

Free RAM: 5737
Type is FAT16
File size 5MB
Buffer size 1024 bytes
Starting write test. Please wait up to a minute
Write 527.95 KB/sec
Maximum latency: 26808 usec, Minimum Latency: 1836 usec, Avg Latency: 1933 usec

Starting read test. Please wait up to a minute
Read 571.14 KB/sec
Maximum latency: 2700 usec, Minimum Latency: 1780 usec, Avg Latency: 1786 usec

The average time to write a byte is under 2 microseconds or 4 microseconds for a 16-bit sample. I think it should work.

This is using an industrial SLC SD card Open Box: Wintec 1GB Secure Digital (SD) Flash Card Model 33121354-I - Newegg.com.

I will put together a a complete test sketch.