10 sinewave generations from 10 PWM output based on Arduino mega

I want to generate 10 sinewave (audio frequency range, mainly 20~8000 Hz) simultaneously from the 10 PWM output pins. My board is Arduino mega 2560.
I do this project following the code attached. Pin 2, 5, 9, 10, 11, 12 work as espected (timer 1, 2, 3). However, there are some problems in Pin 6, 7, 45, 46, which are not working (timer 4, 5). Maybe I missed something in using timer 4 and 5.

Thanks in advance. Greetings.

sine_ten.ino (4.85 KB)

OP’s code in code tags:

#include "avr/pgmspace.h"
#include "arduino.h"

PROGMEM  const int sine256[]  = {
127,130,133,136,139,143,146,149,152,155,158,161,164,167,170,173,176,178,181,184,187,190,192,195,198,200,203,205,208,210,212,215,217,219,221,223,225,227,229,231,233,234,236,238,239,240,242,243,244,245,247,248,249,249,250,251,252,252,253,253,253,254,254,254,254,254,254,254,253,253,253,252,252,251,250,249,249,248,247,245,244,243,242,240,239,238,236,234,233,231,229,227,225,223,221,219,217,215,212,210,208,205,203,200,198,195,192,190,187,184,181,178,176,173,170,167,164,161,158,155,152,149,146,143,139,136,133,130,127,124,121,118,115,111,108,105,102,99,96,93,90,87,84,81,78,76,73,70,67,64,62,59,56,54,51,49,46,44,42,39,37,35,33,31,29,27,25,23,21,20,18,16,15,14,12,11,10,9,7,6,5,5,4,3,2,2,1,1,1,0,0,0,0,0,0,0,1,1,1,2,2,3,4,5,5,6,7,9,10,11,12,14,15,16,18,20,21,23,25,27,29,31,33,35,37,39,42,44,46,49,51,54,56,59,62,64,67,70,73,76,78,81,84,87,90,93,96,99,102,105,108,111,115,118,121,124
};

#define cbi(sfr, bit) (_SFR_BYTE(sfr) &= ~_BV(bit))
#define sbi(sfr, bit) (_SFR_BYTE(sfr) |= _BV(bit))

double dfreq;
const double refclk = 31376.6;

//timer 1   11,12
volatile byte icnt1;
volatile byte icnt11;
volatile byte c4ms1;
volatile unsigned long phaccu1;
volatile unsigned long tword_m1;
//timer 2  10,9
volatile byte icnt2;
volatile byte icnt21;
volatile byte c4ms2;
volatile unsigned long phaccu2;
volatile unsigned long tword_m2;
//timer 3  5,2,3
volatile byte icnt3;
volatile byte icnt31;
volatile byte c4ms3;
volatile unsigned long phaccu3;
volatile unsigned long tword_m3;
//timer 4  6,7,8
volatile byte icnt4;
volatile byte icnt41;
volatile byte c4ms4;
volatile unsigned long phaccu4;
volatile unsigned long tword_m4;
//timer 5  46,45,44
volatile byte icnt5;
volatile byte icnt51;
volatile byte c4ms5;
volatile unsigned long phaccu5;
volatile unsigned long tword_m5;

void setup()
{
  pinMode(2, OUTPUT);
  pinMode(5, OUTPUT);
  pinMode(6, OUTPUT);
  pinMode(7, OUTPUT);
  pinMode(9, OUTPUT);
  pinMode(10, OUTPUT);
  pinMode(11, OUTPUT);
  pinMode(12, OUTPUT);
  pinMode(45, OUTPUT);
  pinMode(46, OUTPUT);
  Setup_timer();

  cbi (TIMSK0, TOIE0);
  sbi (TIMSK1, TOIE1);
  sbi (TIMSK2, TOIE2);
  sbi (TIMSK3, TOIE3);
  sbi (TIMSK4, TOIE4);
  sbi (TIMSK5, TOIE5);

  dfreq = 2000.0;
  tword_m1 = pow(2, 32) * dfreq / refclk;
  tword_m2 = pow(2, 32) * dfreq / refclk;
  tword_m3 = pow(2, 32) * dfreq / refclk;
  tword_m4 = pow(2, 32) * dfreq / refclk;
  tword_m5 = pow(2, 32) * dfreq / refclk;

}


void loop()
{

}


void Setup_timer() {

  TCCR1B = (TCCR1B & 0b11100000) | 0b00000001;
  TCCR2B = (TCCR2B & 0b11110000) | 0b00000001;
  TCCR3B = (TCCR3B & 0b11100000) | 0b00000001; 
  TCCR4B = (TCCR4B & 0b11100000) | 0b00000001;
  TCCR5B = (TCCR5B & 0b11100000) | 0b00000001;
//  sbi (TCCR4B, CS40);
//  cbi (TCCR4B, CS41);
//  cbi (TCCR4B, CS42);

  TCCR1A = (TCCR1A & 0b00001100) | 0b10100001;
  TCCR2A = (TCCR2A & 0b00001100) | 0b10100001;
  TCCR3A = (TCCR3A & 0b00001100) | 0b10100001;
  TCCR4A = (TCCR4A & 0b00001100) | 0b10100001;
  TCCR5A = (TCCR5A & 0b00001100) | 0b10100001;

//  cbi (TCCR4A, COM4A0);  // clear Compare Match
//  sbi (TCCR4A, COM4A1);

  TIMSK1 |= (1 << TOIE1);
  TIMSK2 |= (1 << TOIE2);
  TIMSK3 |= (1 << TOIE3);
  TIMSK4 |= (1 << TOIE4);
  TIMSK5 |= (1 << TOIE5);
//  sbi (TCCR4A, WGM40);  // Mode 1  / Phase Correct PWM
//  cbi (TCCR4A, WGM41);
//  cbi (TCCR4B, WGM42);

}



ISR(TIMER1_OVF_vect) {

  sbi(PORTD, 7);
  phaccu1 = phaccu1 + tword_m1;
  icnt1 = phaccu1 >> 24;

  OCR1A = pgm_read_byte_near(sine256 + icnt1);
  OCR1B = pgm_read_byte_near(sine256 + icnt1);

  if (icnt11++ == 125) {
    c4ms1++;
    icnt11 = 0;
  }

  cbi(PORTD, 7);
}



ISR(TIMER2_OVF_vect) {

  sbi(PORTD, 7);

  phaccu2 = phaccu2 + tword_m2;
  icnt2 = phaccu2 >> 24;

  OCR2A = pgm_read_byte_near(sine256 + icnt2);
  OCR2B = pgm_read_byte_near(sine256 + icnt2);

  if (icnt21++ == 125) {
    c4ms2++;
    icnt21 = 0;
  }

  cbi(PORTD, 7);
}


ISR(TIMER3_OVF_vect) {

  sbi(PORTD, 7);

  phaccu3 = phaccu3 + tword_m3;
  icnt3 = phaccu3 >> 24;

  OCR3A = pgm_read_byte_near(sine256 + icnt3);
  OCR3B = pgm_read_byte_near(sine256 + icnt3);

  if (icnt31++ == 125) {
    c4ms3++;
    icnt31 = 0;
  }
>
  cbi(PORTD, 7);
}


ISR(TIMER4_OVF_vect) {

  sbi(PORTD, 7);

  phaccu4 = phaccu4 + tword_m4;
  icnt4 = phaccu4 >> 24;

  OCR4A = pgm_read_byte_near(sine256 + icnt4);
  OCR4B = pgm_read_byte_near(sine256 + icnt4);
 
  if (icnt41++ == 125) {
    c4ms4++;
    icnt41 = 0;
  }

  cbi(PORTD, 7);
}


ISR(TIMER5_OVF_vect) {

  sbi(PORTD, 7);

  phaccu5 = phaccu5 + tword_m5;
  icnt5 = phaccu5 >> 24;

  OCR5A = pgm_read_byte_near(sine256 + icnt5);
  OCR5B = pgm_read_byte_near(sine256 + icnt5);
 
  if (icnt51++ == 125) {
    c4ms5++;
    icnt51 = 0;
  }

  cbi(PORTD, 7);
}

On a quick inspection my first though is synchronize the timers, then use a single ISR, much less load on the processor, as entering/leaving an ISR is expensive.

My second thought is why use 10 output pins? Do the tones have to be independent? You can combine
tones in software trivially and drive a single pin with the result.

MarkT:
On a quick inspection my first though is synchronize the timers, then use a single ISR, much less load on the processor, as entering/leaving an ISR is expensive.

My second thought is why use 10 output pins? Do the tones have to be independent? You can combine
tones in software trivially and drive a single pin with the result.

Thank you very much for putting my code in code tags(i just forgot it) and replying me.

I need the ten sinwave to be independent and these sinewave can be modulated respectively. so I use ten pins.

I don't know how to use one ISR to realize this. Can you give me some advice, thank you in advance!

Can they not be modulated in software though?

MarkT:
Can they not be modulated in software though?

I would connect ten speakers to the ten pins. So i used four timers with ten pins.

The code is compiled successfully but the audio signals coming from pin 6,7,45,46 didn’t work as expected.

Have you looked at the signals with an oscilloscope? How close to a sine wave are the outputs?

CrossRoads:
Have you looked at the signals with an oscilloscope? How close to a sine wave are the outputs?

Yes, i used a oscilloscope. The signals coming from Pin 2, 5, 9, 10, 11, 12 are exactly sinewave at the designed frequency. But the signals coming from Pin 6, 7 are disordered and there is no signal in Pin 45, 46.

I had a play with your code after freeing up a Mega. The basic problem was that the ISRs together try to use
more than 100% of the processor time, and the last one (being lowest priority) doesn’t get a look-in.

The overheads for entering and exiting an ISR are large, it makes sense to use just one ISR to drive all the
timers. So I amalagated them all into timer1 overflow, synchronized the timers as best I could, and now it
basically works. If took out all the EEPROM accesses too in case they were too slow, moving sine256 to RAM.

Here’s the result:

#include "avr/pgmspace.h"
#include "arduino.h"

const int sine256[]  = {
127,130,133,136,139,143,146,149,152,155,158,161,164,167,170,173,176,178,181,184,187,190,192,195,198,200,203,205,208,210,212,215,217,219,221,223,225,227,229,231,233,234,236,238,239,240,242,243,244,245,247,248,249,249,250,251,252,252,253,253,253,254,254,254,254,254,254,254,253,253,253,252,252,251,250,249,249,248,247,245,244,243,242,240,239,238,236,234,233,231,229,227,225,223,221,219,217,215,212,210,208,205,203,200,198,195,192,190,187,184,181,178,176,173,170,167,164,161,158,155,152,149,146,143,139,136,133,130,127,124,121,118,115,111,108,105,102,99,96,93,90,87,84,81,78,76,73,70,67,64,62,59,56,54,51,49,46,44,42,39,37,35,33,31,29,27,25,23,21,20,18,16,15,14,12,11,10,9,7,6,5,5,4,3,2,2,1,1,1,0,0,0,0,0,0,0,1,1,1,2,2,3,4,5,5,6,7,9,10,11,12,14,15,16,18,20,21,23,25,27,29,31,33,35,37,39,42,44,46,49,51,54,56,59,62,64,67,70,73,76,78,81,84,87,90,93,96,99,102,105,108,111,115,118,121,124
};

#define cbi(sfr, bit) (_SFR_BYTE(sfr) &= ~_BV(bit))
#define sbi(sfr, bit) (_SFR_BYTE(sfr) |= _BV(bit))

double dfreq;
const double refclk = 31376.6;

//timer 1   11,12
volatile byte icnt1;
volatile byte icnt11;
volatile byte c4ms1;
volatile unsigned long phaccu1;
volatile unsigned long tword_m1;
//timer 2  10,9
volatile byte icnt2;
volatile byte icnt21;
volatile byte c4ms2;
volatile unsigned long phaccu2;
volatile unsigned long tword_m2;
//timer 3  5,2,3
volatile byte icnt3;
volatile byte icnt31;
volatile byte c4ms3;
volatile unsigned long phaccu3;
volatile unsigned long tword_m3;
//timer 4  6,7,8
volatile byte icnt4;
volatile byte icnt41;
volatile byte c4ms4;
volatile unsigned long phaccu4;
volatile unsigned long tword_m4;
//timer 5  46,45,44
volatile byte icnt5;
volatile byte icnt51;
volatile byte c4ms5;
volatile unsigned long phaccu5;
volatile unsigned long tword_m5;

void setup()
{
  pinMode(2, OUTPUT);
  pinMode(5, OUTPUT);
  pinMode(6, OUTPUT);
  pinMode(7, OUTPUT);
  pinMode(9, OUTPUT);
  pinMode(10, OUTPUT);
  pinMode(11, OUTPUT);
  pinMode(12, OUTPUT);
  pinMode(45, OUTPUT);
  pinMode(46, OUTPUT);
  pinMode (38, OUTPUT) ;

  dfreq = 2000.0;
  tword_m1 = pow(2, 32) * dfreq / refclk;
  tword_m2 = pow(2, 32) * dfreq / refclk;
  tword_m3 = pow(2, 32) * dfreq / refclk;
  tword_m4 = pow(2, 32) * dfreq / refclk;
  tword_m5 = pow(2, 32) * dfreq / refclk;

  noInterrupts () ;
  Setup_timer();
  cbi (TIMSK0, TOIE0);
  interrupts () ;
}


void loop()
{

}


void Setup_timer() {

  TCCR1B = (TCCR1B & 0b11100000) | 0b00000001;
  TCCR2B = (TCCR2B & 0b11110000) | 0b00000001;
  TCCR3B = (TCCR3B & 0b11100000) | 0b00000001;
  TCCR4B = (TCCR4B & 0b11100000) | 0b00000001;
  TCCR5B = (TCCR5B & 0b11100000) | 0b00000001;
//  sbi (TCCR4B, CS40);
//  cbi (TCCR4B, CS41);
//  cbi (TCCR4B, CS42);

  TCCR1A = (TCCR1A & 0b00001100) | 0b10100001;
  TCCR2A = (TCCR2A & 0b00001100) | 0b10100001;
  TCCR3A = (TCCR3A & 0b00001100) | 0b10100001;
  TCCR4A = (TCCR4A & 0b00001100) | 0b10100001;
  TCCR5A = (TCCR5A & 0b00001100) | 0b10100001;

//  cbi (TCCR4A, COM4A0);  // clear Compare Match
//  sbi (TCCR4A, COM4A1);

//  sbi (TCCR4A, WGM40);  // Mode 1  / Phase Correct PWM
//  cbi (TCCR4A, WGM41);
//  cbi (TCCR4B, WGM42);

  TCNT1 = 0 ; // synch counters, sort of.
  TCNT2 = 5 ;
  TCNT3 = 10 ;
  TCNT4 = 15 ;
  TCNT5 = 20 ;

  TIMSK1 |= (1 << TOIE1);  
}



ISR(TIMER1_OVF_vect) {
  PIND = 0x80 ;

  phaccu1 += tword_m1;
  phaccu2 += tword_m2;
  phaccu3 += tword_m3;
  phaccu4 += tword_m4;
  phaccu5 += tword_m5;
  
  icnt1 = phaccu1 >> 24;
  icnt2 = phaccu2 >> 24;
  icnt3 = phaccu3 >> 24;
  icnt4 = phaccu4 >> 24; 
  icnt5 = phaccu5 >> 24;

  OCR1A = sine256 [icnt1];
  OCR1B = sine256 [icnt1];
  OCR2A = sine256 [icnt2];
  OCR2B = sine256 [icnt2];
  OCR3A = sine256 [icnt3];
  OCR3B = sine256 [icnt3];
  OCR4A = sine256 [icnt4];
  OCR4B = sine256 [icnt4];
  OCR5A = sine256 [icnt5];
  OCR5B = sine256 [icnt5];
 
  if (icnt11++ == 125) {
    c4ms1++;
    icnt11 = 0;
  }
  if (icnt21++ == 125) {
    c4ms2++;
    icnt21 = 0;
  }
  if (icnt31++ == 125) {
    c4ms3++;
    icnt31 = 0;
  }
  if (icnt41++ == 125) {
    c4ms4++;
    icnt41 = 0;
  }
  if (icnt51++ == 125) {
    c4ms5++;
    icnt51 = 0;
  }

  PIND = 0x80 ;
}

MarkT:
I had a play with your code after freeing up a Mega. The basic problem was that the ISRs together try to use
more than 100% of the processor time, and the last one (being lowest priority) doesn’t get a look-in.

The overheads for entering and exiting an ISR are large, it makes sense to use just one ISR to drive all the
timers. So I amalagated them all into timer1 overflow, synchronized the timers as best I could, and now it
basically works. If took out all the EEPROM accesses too in case they were too slow, moving sine256 to RAM.

Thank you very much for helping me and pointing out the problems in my code. I test your code on my board and it’s the exact result i want. Exciting!
Thank you!Sincerely!

Well now there's only ~31000 ISR calls a second rather than attemping and failing to call ~157000 ISRs/second.

the overhead for calling an ISR is many microseconds, as all the registers are dumped and saved and a new
stack frame built and junked.

The synchronization can be improved BTW - currently the indexing of the wavetable is interspersed with the writes to the OCRnx registers - this could cause late update of some of the timers, worth checking they all get
their values at safe points in the PWM cycle (gory details in datasheet alas).

The matching of TCNTn registers would be safer to do with assembler, as its currently at the mercy of the
compiler's optimization and a bit of a guess - it should be possible to match them precisely, they then remain in
lockstep forever if the timers are all in the same mode.

MarkT:
Well now there's only ~31000 ISR calls a second rather than attemping and failing to call ~157000 ISRs/second.

the overhead for calling an ISR is many microseconds, as all the registers are dumped and saved and a new
stack frame built and junked.

Thanks for your advice. If I transit this from Arduino mega to Arduino due board, would this problem be solved automaticly or just unchanged?

Same issue, ISRs take more cycles on a processor with more registers as there's more state to save and restore,
expecting 150000 interrupts a second to work on any machine is asking a lot.

You may even be able to use DMA to drive a PWM pin on the Due, its very sophisticated, but you will have to set
aside several days for reading the datasheet to figure it out (not a joke).