[Solved] About HSEM on PortentaH7 for fast (and safe) inter core communication

Hi, Has anyone used the HSEM function on the Portenta in order to lock a part of a shared code between cores? I am trying to use it and I am facing a few troubles so here my first try with Interrupts :

int myLED;
#ifdef CORE_CM7
#define HSEM_ID 0
#define HSEM_PROCESS 0
int numit=0;
uint8_t status_semaphore = 0;
void setup() {
  // put your setup code here, to run once:
  bootM4();
  myLED = LEDG;
  //delay(10000);
  pinMode(myLED, OUTPUT);
  SET_BIT(RCC->AHB4ENR, RCC_AHB4ENR_HSEMEN_Msk);
  HAL_HSEM_ActivateNotification(__HAL_HSEM_SEMID_TO_MASK(HSEM_ID));
////  Serial.println(READ_REG(HSEM->C1IER),BIN);
////  Serial.println(READ_REG(HSEM->C2IER),BIN);
//  Serial.println(READ_REG(HSEM_COMMON->IER),BIN);  
  //SET_BIT(HSEM->C1IER, HSEM_C1IER_ISE0_Msk);
  NVIC_SetVector(HSEM1_IRQn, (uint32_t)&HSEM1_IRQHandler);
}
void loop() {
  // put your main code here, to run repeatedly:
  if (HAL_HSEM_Take(HSEM_ID, HSEM_PROCESS) == HAL_OK) {
    status_semaphore = 0; //Lock
    NVIC_DisableIRQ(HSEM1_IRQn);
    digitalWrite(myLED, LOW);
    delay(2000);
    digitalWrite(myLED, HIGH);
    HAL_HSEM_Release(HSEM_ID, HSEM_PROCESS);
    delay(1000);
  }
  else {
    NVIC_EnableIRQ(HSEM1_IRQn);
    while (status_semaphore == 0){
      }
  } delay(1000);
  Serial.println(numit);
}

void HSEM1_IRQHandler(void) {
  numit=numit+1;
  status_semaphore = 1;
  if (HSEM->C1ISR & 0x00) { //Check semaphore Interrupt number
    HSEM->C1ICR = HSEM_C1ICR_ISC0; //Clear IT
    HSEM_COMMON->ICR= HSEM_C1ICR_ISC0;
  }
}
#endif

#ifdef CORE_CM4
#define HSEM_ID 0
#define HSEM_PROCESS 0

uint8_t status_semaphore = 0;
int calc = 0;
void setup() {
  myLED = LEDG;
  pinMode(myLED, OUTPUT);
  // put your setup code here, to run once:
  //SET_BIT(RCC_C2->AHB4ENR, RCC_AHB4ENR_HSEMEN_Msk);
  NVIC_SetVector(HSEM2_IRQn, (uint32_t)&HSEM2_IRQHandler);
  HAL_HSEM_ActivateNotification(__HAL_HSEM_SEMID_TO_MASK(HSEM_ID));
  //SET_BIT(HSEM->C2IER, HSEM_C2IER_ISE0_Msk);

}

void loop() {

  // put your main code here, to run repeatedly:
  if (HAL_HSEM_Take(HSEM_ID, HSEM_PROCESS) == HAL_OK) {

    status_semaphore = 0;//Lock
    NVIC_DisableIRQ(HSEM2_IRQn);
    digitalWrite(myLED, LOW);
    delay(500);
    digitalWrite(myLED, HIGH);
    HAL_HSEM_Release(HSEM_ID, HSEM_PROCESS);
    delay(1000);
  }
  else {
    
    NVIC_EnableIRQ(HSEM2_IRQn);
    delay(1000);
    digitalWrite(LEDR,LOW);
    while (status_semaphore == 0) {
    }
  }
  
}
void HSEM2_IRQHandler() {
  status_semaphore = 1;
  if (HSEM->C2ISR & 0x00) { //Check semaphore Interrupt number
    HSEM->C2ICR = HSEM_C2ICR_ISC0;
    HSEM_COMMON->ICR= HSEM_C2ICR_ISC0;
  }
}
#endif

In this case the CORE M4 ends up stucks in an interruption during the first loop iteration, that is normal since M7 takes control first (faster core) and then he frees the Semaphore while CORE_M4 was waiting for it tob be freed in the while (status_semaphore == 0) { } but it doesn't get out of it even though I cleared all the interrupts enabled by the HAL_HSEM_ActivateNotification(__HAL_HSEM_SEMID_TO_MASK(HSEM_ID)); , here is the code for the previous function :

void HAL_HSEM_ActivateNotification(uint32_t SemMask)
{
#if  USE_MULTI_CORE_SHARED_CODE != 0U
  /*enable the semaphore mask interrupts */
  if (HAL_GetCurrentCPUID() == HSEM_CPU1_COREID)
  {
    /*Use interrupt line 0 for CPU1 Master */
    HSEM->C1IER |= SemMask;
  }
  else /* HSEM_CPU2_COREID */
  {
    /*Use interrupt line 1 for CPU2 Master*/
    HSEM->C2IER |= SemMask;
  }
#else
  HSEM_COMMON->IER |= SemMask;
#endif
}

So thinking I got in trouble only due to interrupts I got rid of them and used a while loop allowing my code to go further only if the semaphore is free, so here :

#ifdef CORE_CM7
#define HSEM_ID 0
#define HSEM_PROCESS 0
struct shared_data
{
  uint32_t val;//status : 1=transfer complete 0=waiting
};
volatile struct shared_data * const buff= (struct shared_data *)0x38001000;
void setup() {
  // put your setup code here, to run once:
  Serial.begin(115200);
  LL_RCC_ForceCM4Boot();
  __HAL_RCC_HSEM_CLK_ENABLE();
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
  //
}

void loop() {
  
  // put your main code here, to run repeatedly:
  while(HAL_HSEM_Take(HSEM_ID, HSEM_PROCESS) != HAL_OK){};
  //Serial.println(buff->val);
  //digitalWrite(LEDG,LOW);
  Serial.println(buff->val);
  digitalWrite(LEDB,LOW);
  delay(500);
  digitalWrite(LEDB,HIGH);
  delay(500);
  buff->val=0;
  Serial.println(buff->val);
  HAL_HSEM_Release(HSEM_ID, HSEM_PROCESS);
  //delay(100);
}
#endif
#ifdef CORE_CM4
#define HSEM_ID 0
#define HSEM_PROCESS 0
int temps=0;
struct shared_data
{
  uint8_t val;//status : 1=transfer complete 0=waiting
};
volatile struct shared_data * const buff= (struct shared_data *)0x38001000;
void setup() {
  // put your setup code here, to run once:
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
}

void loop() {
  // put your main code here, to run repeatedly:
  while(HAL_HSEM_Take(HSEM_ID, HSEM_PROCESS) != HAL_OK){};
  //digitalWrite(LEDR,LOW);
  buff->val=1;
  digitalWrite(LEDB,LOW);
  delay(1000);
  digitalWrite(LEDB,HIGH);
  delay(1000);
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
  HAL_HSEM_Release(HSEM_ID, HSEM_PROCESS);
}
#endif

This one works (LEDS alternate blink fraquency according to which core is in the lead). Now if I try to communicate Data using a structure defined in both core codes at the same address (within SRAM 3): buff and I try to change buff->val (1 for core M4 and 0 for core M7) using the semaphore to do not access the said register at the same time (I print buff->val twice during the semaphore once when it is supposed to be after CORE_M4 (buff->val=?1) and one during CORE_M7(buff->val=0) use of the Semaphore). Yet the results I got are only 0s as if the semaphore within M4 was unable to change buff->val value. Does anyone has a opinion on it, here is what I found while browsing around :

  • SCB_CleanDCache_by_Addr(buff, sizeof(*buff)); this line seems to have to be used in order to flush data within the cache to memory but I must admit I have no idea how to use it (evidence being it commented everywhere in my code :confused: ). I would be more than thankful for any insight.
  • Maybe the buff struct is not considered as one ... will try to check that using RPC to print during the M4 semaphore.

EDIT : It is not linked to the error where I defined val as uint8_t that I just noticed...

EDIT2: It is definitely the same struct.. Somehow the M4 seems able to read from it but not write there? Why?? How can I bypass that? They are supposed to both have the same level of accreditations within that part of the memory. The code to check that the M4 is indeed able to read from that struct :

#ifdef CORE_CM7
#define HSEM_ID 0
#define HSEM_PROCESS 0
struct shared_data
{
  uint32_t val;//status : 1=transfer complete 0=waiting
};
volatile struct shared_data * const buff= (struct shared_data *)0x38001000;
int c=1;
void setup() {
  // put your setup code here, to run once:
  //RPC.begin();
  Serial.begin(115200);
  LL_RCC_ForceCM4Boot();
  __HAL_RCC_HSEM_CLK_ENABLE();
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
  //
  
}

void loop() {
  
  // put your main code here, to run repeatedly:
  while(HAL_HSEM_Take(HSEM_ID, HSEM_PROCESS) != HAL_OK){};
  //Serial.println(buff->val);
  //digitalWrite(LEDG,LOW);
  //while (RPC.available()) {
//       Serial.write(RPC.read()); // check if the M4 has sent an RPC println
//     }
  Serial.println(buff->val);
  digitalWrite(LEDB,LOW);
  delay(500);
  digitalWrite(LEDB,HIGH);
  delay(500);
  buff->val=(c*1000)%10000;
  SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
  Serial.println(buff->val);
  HAL_HSEM_Release(HSEM_ID, HSEM_PROCESS);
  //delay(100);
  c=c+1;
}
#endif
#ifdef CORE_CM4
#define HSEM_ID 0
#define HSEM_PROCESS 0
int temps=0;
struct shared_data
{
  uint32_t val;//status : 1=transfer complete 0=waiting
};
volatile struct shared_data * const buff= (struct shared_data *)0x38001000;
void setup() {
  //RPC.begin();
  // put your setup code here, to run once:
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
}

void loop() {
  // put your main code here, to run repeatedly:
  while(HAL_HSEM_Take(HSEM_ID, HSEM_PROCESS) != HAL_OK){};
  //digitalWrite(LEDR,LOW);
  int timeLED =buff->val;
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
  digitalWrite(LEDB,LOW);
  delay(timeLED);
  digitalWrite(LEDB,HIGH);
  delay(timeLED);
  //RPC.println(buff->val);
  
  HAL_HSEM_Release(HSEM_ID, HSEM_PROCESS);
}
#endif

EDIT : IT worked need to use SCB_CleanInvalidateDCache_by_Addr(buff, sizeof(*buff)); after M4 part!!

Code:

//#include "RPC.h" 
#ifdef CORE_CM7
#define HSEM_ID 0
#define HSEM_PROCESS 0
struct shared_data
{
  uint32_t datM4;
  uint32_t val;//status : 1=transfer complete 0=waiting
};
volatile struct shared_data * const buff= (struct shared_data *)0x38001000;
int c=1;
void setup() {
  // put your setup code here, to run once:
  //RPC.begin();
  Serial.begin(115200);
  LL_RCC_ForceCM4Boot();
  __HAL_RCC_HSEM_CLK_ENABLE();
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
  //
  
}

void loop() {
  
  // put your main code here, to run repeatedly:
  while(HAL_HSEM_Take(HSEM_ID, HSEM_PROCESS) != HAL_OK){};
  //Serial.println(buff->val);
  //digitalWrite(LEDG,LOW);
  //while (RPC.available()) {
//       Serial.write(RPC.read()); // check if the M4 has sent an RPC println
//     }
  SCB_CleanInvalidateDCache_by_Addr(buff, sizeof(*buff));
  Serial.println(buff->datM4);
  buff->datM4=0;
  Serial.println(buff->datM4);
  digitalWrite(LEDB,LOW);
  delay(500);
  digitalWrite(LEDB,HIGH);
  delay(500);
  buff->val=(c*1000)%10000;
  SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
  Serial.println(buff->val);
  HAL_HSEM_Release(HSEM_ID, HSEM_PROCESS);
  //delay(100);
  c=c+1;
}
#endif
#ifdef CORE_CM4
#define HSEM_ID 0
#define HSEM_PROCESS 0
int temps=0;
struct shared_data
{
  uint32_t datM4;
  uint32_t val;//status : 1=transfer complete 0=waiting
};
volatile struct shared_data * const buff= (struct shared_data *)0x38001000;
void setup() {
  //RPC.begin();
  // put your setup code here, to run once:
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
}

void loop() {
  // put your main code here, to run repeatedly:
  while(HAL_HSEM_Take(HSEM_ID, HSEM_PROCESS) != HAL_OK){};
  //digitalWrite(LEDR,LOW);
  int timeLED =buff->val;
  buff->datM4=1;
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
  digitalWrite(LEDB,LOW);
  delay(timeLED);
  digitalWrite(LEDB,HIGH);
  delay(timeLED);
  //RPC.println(buff->val);
  
  HAL_HSEM_Release(HSEM_ID, HSEM_PROCESS);
}
#endif

I will measure the communication speed another day in order to check wether that is truly useful (I am expecting good results :slight_smile: ). Will keep you updated.

Edit : 1 to 2 µs to alternatively modify a uint8_t value 1 000 000 times. Test code below :

#ifdef CORE_CM7
#define HSEM_ID 0
#define HSEM_PROCESS 0
struct shared_data
{
  uint32_t datM4;
  uint32_t val;//status : 1=transfer complete 0=waiting
};
volatile struct shared_data * const buff = (struct shared_data *)0x38001000;
int c = 1;
void setup() {
  // put your setup code here, to run once:
  //RPC.begin();
  Serial.begin(115200);
  LL_RCC_ForceCM4Boot();
  __HAL_RCC_HSEM_CLK_ENABLE();
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
  //

}

void loop() {
  int i;
  long tempsi=micros();
  for (i = 0; i++; i < 1000000) {
    // put your main code here, to run repeatedly:
    while (HAL_HSEM_Take(HSEM_ID, HSEM_PROCESS) != HAL_OK) {};
    //Serial.println(buff->val);
    //digitalWrite(LEDG,LOW);
    //while (RPC.available()) {
    //       Serial.write(RPC.read()); // check if the M4 has sent an RPC println
    //     }
    SCB_CleanInvalidateDCache_by_Addr(buff, sizeof(*buff));
    //Serial.println(buff->datM4);
    buff->datM4 = 0;
//    Serial.println(buff->datM4);
//    digitalWrite(LEDB, LOW);
//    delay(500);
//    digitalWrite(LEDB, HIGH);
//    delay(500);
    //buff->val=(c*1000)%10000;
    SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
    //Serial.println(buff->val);
    HAL_HSEM_Release(HSEM_ID, HSEM_PROCESS);
    //delay(100);
    //c=c+1;
  }
  long temps=micros()-tempsi;
  Serial.println(temps);
}
#endif
#ifdef CORE_CM4
#define HSEM_ID 0
#define HSEM_PROCESS 0
int temps = 0;
struct shared_data
{
  uint32_t datM4;
  uint32_t val;//status : 1=transfer complete 0=waiting
};
volatile struct shared_data * const buff = (struct shared_data *)0x38001000;
void setup() {
  //RPC.begin();
  // put your setup code here, to run once:
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
}

void loop() {
  // put your main code here, to run repeatedly:
  while (HAL_HSEM_Take(HSEM_ID, HSEM_PROCESS) != HAL_OK) {};
  //digitalWrite(LEDR,LOW);
  //int timeLED = buff->val;
  buff->datM4 = 1;
  //SCB_CleanDCache_by_Addr(buff, sizeof(*buff));
//  digitalWrite(LEDB, LOW);
//  delay(timeLED);
//  digitalWrite(LEDB, HIGH);
//  delay(timeLED);
  //RPC.println(buff->val);

  HAL_HSEM_Release(HSEM_ID, HSEM_PROCESS);
}
#endif

It s fast enough for me so I ll stop here.

The HSEM should be used by the RPC mechanisms.
Or when you see the STM32 HAL examples there is also how to use HSEM.

STM32 HSEM

STM HSEM link

Read that : https://www.st.com/resource/en/application_note/an5617-stm32h745755-and-stm32h747757-lines-interprocessor-communications-stmicroelectronics.pdf they advice my method in the Notification part. HSEM never was and never will be about RPC only.

1 Like

This topic was automatically closed 180 days after the last reply. New replies are no longer allowed.