Here's my first attempt at 24 Mbit/sec speed in SdFat. It still needs work (block send it's working at all), but even this makes a pretty substantial speedup.
//------------------------------------------------------------------------------
/**
* Initialize hardware SPI
* Set SCK rate to F_CPU/pow(2, 1 + spiRate) for spiRate [0,6]
*/
static void spiInit(uint8_t spiRate) {
// See avr processor documentation
#if defined(USE_NATIVE_MK20DX128) && 1
SIM_SCGC6 |= SIM_SCGC6_SPI0;
SPI0_MCR = SPI_MCR_MDIS | SPI_MCR_HALT;
// spiRate = 0 : 24 or 12 Mbit/sec
// spiRate = 1 : 12 or 6 Mbit/sec
// spiRate = 2 : 6 or 3 Mbit/sec
// spiRate = 3 : 3 or 1.5 Mbit/sec
// spiRate = 4 : 1.5 or 0.75 Mbit/sec
// spiRate = 5 : 250 kbit/sec
// spiRate = 6 : 125 kbit/sec
uint32_t ctar;
switch (spiRate) {
case 0: ctar = SPI_CTAR_FMSZ(7) | SPI_CTAR_DBR | SPI_CTAR_BR(0); break;
case 1: ctar = SPI_CTAR_FMSZ(7) | SPI_CTAR_BR(0); break;
case 2: ctar = SPI_CTAR_FMSZ(7) | SPI_CTAR_BR(1); break;
case 3: ctar = SPI_CTAR_FMSZ(7) | SPI_CTAR_BR(2); break;
case 4: ctar = SPI_CTAR_FMSZ(7) | SPI_CTAR_BR(3); break;
#if F_BUS == 48000000
case 5: ctar = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(5); break;
default: ctar = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(6);
#elif F_BUS == 24000000
case 5: ctar = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(4); break;
default: ctar = SPI_CTAR_FMSZ(7) | SPI_CTAR_PBR(1) | SPI_CTAR_BR(5);
#else
#error "MK20DX128 bus frequency must be 48 or 24 MHz"
#endif
}
SPI0_CTAR0 = ctar;
SPI0_MCR = SPI_MCR_MSTR;
CORE_PIN11_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
CORE_PIN12_CONFIG = PORT_PCR_MUX(2);
CORE_PIN13_CONFIG = PORT_PCR_DSE | PORT_PCR_MUX(2);
#else
SPCR = (1 << SPE) | (1 << MSTR) | (spiRate >> 1);
SPSR = spiRate & 1 || spiRate == 6 ? 0 : 1 << SPI2X;
#endif
}
//------------------------------------------------------------------------------
/** SPI receive a byte */
static inline __attribute__((always_inline))
uint8_t spiRec() {
#if defined(USE_NATIVE_MK20DX128) && 1
SPI0_MCR = SPI_MCR_MSTR | SPI_MCR_CLR_RXF;
SPI0_SR = SPI_SR_TCF;
SPI0_PUSHR = 0xFF;
while (!(SPI0_SR & SPI_SR_TCF)) ;
return SPI0_POPR;
#else
SPDR = 0XFF;
while (!(SPSR & (1 << SPIF)));
return SPDR;
#endif
}
//------------------------------------------------------------------------------
/** SPI read data - only one call so force inline */
static inline __attribute__((always_inline))
void spiRead(uint8_t* buf, uint16_t nbyte) {
#if defined(USE_NATIVE_MK20DX128) && 1
SPI0_MCR = SPI_MCR_MSTR | SPI_MCR_CLR_RXF;
uint32_t status, txcount=0, rxcount=0;
while (txcount < nbyte) {
status = SPI0_SR;
if (((status >> 12) & 15) < 4) {
SPI0_PUSHR = 0xFF;
txcount++;
}
if (((status >> 4) & 15) > 0) {
*buf++ = SPI0_POPR;
rxcount++;
}
}
while (rxcount < nbyte) {
if (((status >> 4) & 15) > 0) {
*buf++ = SPI0_POPR;
rxcount++;
}
}
#else
if (nbyte-- == 0) return;
SPDR = 0XFF;
for (uint16_t i = 0; i < nbyte; i++) {
while (!(SPSR & (1 << SPIF)));
buf[i] = SPDR;
SPDR = 0XFF;
}
while (!(SPSR & (1 << SPIF)));
buf[nbyte] = SPDR;
#endif
}
//------------------------------------------------------------------------------
/** SPI send a byte */
static inline __attribute__((always_inline))
void spiSend(uint8_t b) {
#if defined(USE_NATIVE_MK20DX128) && 1
SPI0_SR = SPI_SR_TCF;
SPI0_PUSHR = b;
while (!(SPI0_SR & SPI_SR_TCF)) ;
#else
SPDR = b;
while (!(SPSR & (1 << SPIF)));
#endif
}
//------------------------------------------------------------------------------
/** SPI send block - only one call so force inline */
static inline __attribute__((always_inline))
void spiSendBlock(uint8_t token, const uint8_t* buf) {
#if defined(USE_NATIVE_MK20DX128) && 0 // This does not work... why??
uint32_t status, txcount=0;
SPI0_SR = SPI_SR_TCF;
SPI0_PUSHR = token;
while (txcount < 512) {
status = SPI0_SR;
if (((status >> 12) & 15) < 4) {
SPI0_PUSHR = *buf++;
txcount++;
}
}
while (1) {
status = SPI0_SR;
if (((status >> 12) & 15) == 0) break;
}
while (!(SPI0_SR & SPI_SR_TCF)) ;
#else
SPDR = token;
for (uint16_t i = 0; i < 512; i += 2) {
while (!(SPSR & (1 << SPIF)));
SPDR = buf[i];
while (!(SPSR & (1 << SPIF)));
SPDR = buf[i + 1];
}
while (!(SPSR & (1 << SPIF)));
#endif
}