Left is the master, and right is the slave. The boards are Arduino 33 Nano BLE.
Master Code:
#include <Wire.h> // Include Wire library for I2C
#include "Fastest_Path_Grid_Layout.h"
int led_pins[NUM_STATES] = {2, 3, 4, 5, 6, 0, 0, 7, 8, 0, 0, 9, 10, 11, 12, 13}; // LEDs connected to pins D2-D13
float Q[NUM_STATES][NUM_ACTIONS];
int master_state = 0; // Current state
int slave_state = -1; // Current slave state
int start = 0; // Start state is 0
int ep_max = 50; // Number of iterations
int ep_count = 1; // Current episode
int time_delay = 100; // Delay amount
float learn_rate = 0.3; // Learning rate
float discount = 0.9; // Discount factor
float greedy = 0.3; // Greedy policy
/*
Initial program setup
*/
void setup() {
// Initialize GPIO for LEDs
Init();
// Initialize I2C comm as Master
Wire.begin();
// Setup serial monitor
Serial.begin(9600);
delay(3000);
// digitalWrite(led_pins[wall], HIGH);
}
/*
Main Program
*/
void loop() {
Serial.print("Master State: ");
Serial.println(master_state);
slave_state = getStateSlave();
// Wire.requestFrom(SLAVE_ADDR, ANSWERSIZE);
// if (Wire.available() >= 2) {
// slave_state = Wire.read() << 8 | Wire.read();
// }
// Serial.print("Slave State: ");
// Serial.println(slave_state);
digitalWrite(led_pins[master_state], HIGH); // Turn on the LED of the current state
digitalWrite(led_pins[slave_state], HIGH); // Turn on the LED of the slave state
int action = chooseAction(master_state); // Choose an action for the agent
int next_state = getNextState(master_state, action, slave_state); // Determines the next state
float reward;
if (next_state == goal) { // Goal is reached
reward = 1;
Serial.print("Episode: ");
Serial.println(ep_count);
ep_count++;
} else if (next_state == hole) {
reward = -1;
Serial.print("Episode: ");
Serial.println(ep_count);
ep_count++;
} else { // Movement penalty
reward = -0.04;
}
updateQ(master_state, action, reward, next_state); // Update Q-table
master_state = next_state;
sendStateSlave();
delay(time_delay);
if (master_state == goal) { // Reset to Start Once Goal is reached
LEDOff();
digitalWrite(led_pins[master_state], HIGH);
delay(time_delay);
master_state = start;
}
LEDOff(); // Turn off all LEDs
// if (ep_count == 50) time_delay = 2000;
if (ep_count > ep_max) {
Serial.println("Finished!");
Serial.println();
int current_state = 0;
// Print in 4x4 grid
for (int row = 0; row < 4; row++) {
for (int col = 0; col < 4; col++) {
float current_max = -5.0; // Use very low initial value
int best_action = -1;
for (int k = 0; k < NUM_ACTIONS; k++) {
if (Q[current_state][k] > current_max && Q[current_state][k] != 0) {
current_max = Q[current_state][k];
best_action = k;
}
}
// Print max Q and best action
Serial.print("S");
Serial.print(current_state);
Serial.print(": ");
Serial.print(current_max, 2); // Print with 2 decimal places
Serial.print(" (");
switch (best_action) {
case 0: Serial.print("↑"); break;
case 1: Serial.print("→"); break;
case 2: Serial.print("←"); break;
case 3: Serial.print("↓"); break;
default: Serial.print("N/A"); break;
}
Serial.print(")\t");
current_state++;
}
Serial.println();
}
while (1); // Halt program
}
}
/*================================ Master Functions ============================================================================*/
/*
Initializes the LED pins as Outputs
*/
void Init() {
// Set all LED pins as outputs
for (int i = 0; i < NUM_STATES; i++) {
pinMode(led_pins[i], OUTPUT);
// Initialize Q-values to 0
for (int j = 0; j < NUM_ACTIONS; j++) {
Q[i][j] = 0;
}
}
}
/*
Turn all LEDs Off
*/
void LEDOff() {
for (int i = 0; i < NUM_STATES; i++) {
// if (i != wall) {
digitalWrite(led_pins[i], LOW);
// }
}
}
/*
Chooses the next action for the agent 30% random and
70% best action
*/
int chooseAction(int current){
if (random(100) < greedy * 100) { // Take random action 30% of the time
return random(NUM_ACTIONS);
}
float current_max = 0; // Next states max Q-values
int best_action = 0; // The best action to max Q-value
for (int i = 0; i < NUM_ACTIONS; i++) { // Take best action 70% of the time
if (current_max < Q[current][i]) {
current_max = Q[current][i];
best_action = i;
}
}
if (current_max == 0) { // Take random action if all Q-values are 0
return random(NUM_ACTIONS);
}
return best_action; // Take best action
}
/*
Returns the next state of the agent after action
takes place
*/
int getNextState(int current, int action, int slave) {
int next;
if (action == 0) { // Move Up
// Stay in current state if in top row, else move
if (current == 0 || current == 1 || current == 2 || current == 3 ) {
return current;
} else {
next = current - 4;
}
} else if (action == 1) { // Move Right
// Stay in current state if in right column, else move
if (current == 3 || current == 7 || current == 11) {
return current;
} else {
next = current + 1;
}
} else if (action == 2) { // Move Left
// Stay in current state if in left column, else move
if (current == 0 || current == 4 || current == 8 || current == 12) {
return current;
} else {
next = current - 1;
}
} else { // Move Down
// Stay in current state if in bottom row, else move
if (current == 12 || current == 13 || current == 14) {
return current;
} else {
next = current + 4;
}
}
// Stay in current state if next is a wall or slave
for (int i = 0; i < size_wall; i++) {
if (next == wall[i]) {
return current;
}
}
if (next == slave) {
return current;
}
return next;
}
/*
Updates the Q-Table for the current state and action
*/
void updateQ(int current, int action, float reward, int next) {
Q[current][action] += learn_rate * (reward + discount * Q[next][chooseAction(next)] - Q[current][action]);
}
/*================================ Communication with Slave ====================================================================*/
/*
Sends the final state of the Master
*/
void sendStateSlave() {
Wire.beginTransmission(SLAVE_ADDR);
Wire.write(highByte(master_state)); // Send upper-half byte
Wire.write(lowByte(master_state)); // Send lower-half byte
Wire.endTransmission();
}
/*
Gets the state of the Slave
*/
int getStateSlave() {
int receivedValue;
Wire.requestFrom(SLAVE_ADDR, ANSWERSIZE);
if (Wire.available() >= 2) {
receivedValue = Wire.read() << 8 | Wire.read();
}
return receivedValue;
}
Slave Code:
#include <Wire.h> // Include Wire library for I2C
#include "Fastest_Path_Grid_Layout.h"
#define SLAVE_ADDR 9 // Slave I2C Address
#define ANSWERSIZE 2 // Slave answer size
#define NUM_STATES 16
#define NUM_ACTIONS 4 // 0: Up, 1: Right, 2: Left, 3: Down
float Q[NUM_STATES][NUM_ACTIONS];
volatile int master_state = -1; // Current master state
volatile bool newData = false; // True when there is new data
int slave_state = 2; // Current state
int start = 2; // Start state is 0
int ep_max = 50; // Number of iterations
int ep_count = 1; // Current episode
int time_delay = 20; // Delay amount
float learn_rate = 0.3; // Learning rate
float discount = 0.9; // Discount factor
float greedy = 0.3; // Greedy policy
/*
Initial program setup
*/
void setup() {
// Initialize I2C comm as Master
Wire.begin(SLAVE_ADDR);
// Run when data is requested from master
Wire.onRequest(requestEvent);
// Run when data is received from master
Wire.onReceive(receiveEvent);
// Setup serial monitor
Serial.begin(9600);
// digitalWrite(led_pins[wall], HIGH);
}
/*================================ Communication with Master ===================================================================*/
/*
Gets the state of the Master
*/
void receiveEvent(int numBytes) {
if (numBytes >= 2) {
int high = Wire.read();
int low = Wire.read();
master_state = (high << 8) | low;
newData = true;
}
}
/*
Sends the final state of the Slave
*/
void requestEvent() {
Wire.write(highByte(slave_state));
Wire.write(lowByte(slave_state));
}
/*
Main Program
*/
void loop() {
if (newData) {
newData = false;
Serial.print("Slave State: ");
Serial.println(slave_state);
int action = chooseAction(slave_state); // Choose an action for the agent
int next_state = getNextState(slave_state, action, master_state); // Determines the next state
float reward;
if (next_state == goal) { // Goal is reached
reward = 1;
Serial.print("Episode: ");
Serial.println(ep_count);
ep_count++;
} else if (next_state == hole) {
reward = -1;
Serial.print("Episode: ");
Serial.println(ep_count);
ep_count++;
} else { // Movement penalty
reward = -0.04;
}
updateQ(slave_state, action, reward, next_state); // Update Q-table
slave_state = next_state;
delay(time_delay);
if (slave_state == goal) { // Reset to Start Once Goal is reached
delay(time_delay);
slave_state = start;
}
if (ep_count > ep_max) {
Serial.println("Finished!");
Serial.println();
int current_state = 0;
// Print in 4x4 grid
for (int row = 0; row < 4; row++) {
for (int col = 0; col < 4; col++) {
float current_max = -5.0; // Use very low initial value
int best_action = -1;
for (int k = 0; k < NUM_ACTIONS; k++) {
if (Q[current_state][k] > current_max && Q[current_state][k] != 0) {
current_max = Q[current_state][k];
best_action = k;
}
}
// Print max Q and best action
Serial.print("S");
Serial.print(current_state);
Serial.print(": ");
Serial.print(current_max, 2); // Print with 2 decimal places
Serial.print(" (");
switch (best_action) {
case 0: Serial.print("↑"); break;
case 1: Serial.print("→"); break;
case 2: Serial.print("←"); break;
case 3: Serial.print("↓"); break;
default: Serial.print("N/A"); break;
}
Serial.print(")\t");
current_state++;
}
Serial.println();
}
while (1); // Halt program
}
}
}
/*================================ Master Functions ============================================================================*/
/*
Chooses the next action for the agent 30% random and
70% best action
*/
int chooseAction(int current){
if (random(100) < greedy * 100) { // Take random action 30% of the time
return random(NUM_ACTIONS);
}
float current_max = 0; // Next states max Q-values
int best_action = 0; // The best action to max Q-value
for (int i = 0; i < NUM_ACTIONS; i++) { // Take best action 70% of the time
if (current_max < Q[current][i]) {
current_max = Q[current][i];
best_action = i;
}
}
if (current_max == 0) { // Take random action if all Q-values are 0
return random(NUM_ACTIONS);
}
return best_action; // Take best action
}
/*
Returns the next state of the agent after action
takes place
*/
int getNextState(int current, int action, int master) {
int next;
if (action == 0) { // Move Up
// Stay in current state if in top row, else move
if (current == 0 || current == 1 || current == 2 || current == 3 ) {
return current;
} else {
next = current - 4;
}
} else if (action == 1) { // Move Right
// Stay in current state if in right column, else move
if (current == 3 || current == 7 || current == 11) {
return current;
} else {
next = current + 1;
}
} else if (action == 2) { // Move Left
// Stay in current state if in left column, else move
if (current == 0 || current == 4 || current == 8 || current == 12) {
return current;
} else {
next = current - 1;
}
} else { // Move Down
// Stay in current state if in bottom row, else move
if (current == 12 || current == 13 || current == 14) {
return current;
} else {
next = current + 4;
}
}
// Stay in current state if next is a wall or is master
for (int i = 0; i < size_wall; i++) {
if (next == wall[i]) {
return current;
}
}
if (next == master) {
return current;
}
return next;
}
/*
Updates the Q-Table for the current state and action
*/
void updateQ(int current, int action, float reward, int next) {
Q[current][action] += learn_rate * (reward + discount * Q[next][chooseAction(next)] - Q[current][action]);
}
/*================================ Communication with Slave ====================================================================*/
// /*
// Sends the final state of the Master
// */
// void sendStateSlave() {
// Wire.beginTransmission(SLAVE_ADDR);
// Wire.write(master_state);
// Wire.endTransmission();
// }
// /*
// Gets the state of the Slave
// */
// int getStateSlave() {
// Wire.requestFrom(SLAVE_ADDR, ANSWERSIZE);
// String response = "";
// while(Wire.available()) {
// char b = Wire.read();
// response += b;
// }
// return response.toInt();
// }
Fastest_Path_Grid_Layout.h is just for the led pins