samuel-MPVRL-HVAC - Wokwi ESP32, STM32, Arduino Simulator

// RL-Tuned MPC for Multi-Zone HVAC using ESP32 + Wokwi
// Components: 2x Servo, 2x DHT22, DIP Switch 8, I2C 20x4 LCD
// Version 2: Corrected control logic and simulation physics

#include <Wire.h>
#include <LiquidCrystal_I2C.h>
#include <DHT.h>
#include <ESP32Servo.h>

// ==== Constants ====
#define DHTPIN1 25
#define DHTPIN2 26
#define DHTTYPE DHT22
#define SERVO1_PIN 18
#define SERVO2_PIN 19
#define I2C_ADDR 0x27

// CORRECTED: Standard 20x4 LCD dimensions
#define LCD_COLS 20
#define LCD_ROWS 4

#define NUM_LAMBDAS 5
#define NUM_ZONES 2
#define EPSILON 0.2
#define ALPHA 0.1  // Learning rate
#define SETPOINT_BASE 22.0
#define SETPOINT_STEP 1.0

// DIP Switch Pins
const int dipPins[8] = {32, 33, 35, 34, 27, 12, 13, 14};

// ==== Globals ====
DHT dht1(DHTPIN1, DHTTYPE);
DHT dht2(DHTPIN2, DHTTYPE);
Servo servo1, servo2;
LiquidCrystal_I2C lcd(I2C_ADDR, LCD_COLS, LCD_ROWS);

float lambdas[NUM_LAMBDAS] = {0.1, 0.5, 1.0, 2.0, 5.0};
float Q[NUM_LAMBDAS] = {0}; // Q-values for each lambda
int N[NUM_LAMBDAS] = {0};   // Count of how many times each lambda was chosen

// Simulated state variables
float zoneTemp[NUM_ZONES] = {15.0, 15.0}; // Start at a cooler temperature

// ==== Functions ====

// Reads DIP switches to determine the target temperature
float getSetpoint() {
  int sp_bits = digitalRead(dipPins[0]) + 2 * digitalRead(dipPins[1]);
  return SETPOINT_BASE + sp_bits * SETPOINT_STEP; // 22, 23, 24, 25°C
}

// Reads DIP switch to check if we should use a fixed lambda (Manual Mode)
bool useFixedLambda() {
  return digitalRead(dipPins[2]) == HIGH;
}

// Reads DIP switches to get the index of the fixed lambda to use
int getFixedLambdaIndex() {
  int idx_bits = digitalRead(dipPins[3]) + 2 * digitalRead(dipPins[4]);
  return constrain(idx_bits, 0, NUM_LAMBDAS - 1);
}

// Reads DIP switch to check if RL memory should be reset
bool resetRL() {
  return digitalRead(dipPins[5]) == HIGH;
}

void initDIPSwitches() {
  for (int i = 0; i < 8; i++) {
    pinMode(dipPins[i], INPUT_PULLUP);
  }
}

void setup() {
  Serial.begin(115200);
  Serial.println("Time,T1,T2,U1,U2,Lambda,Reward,Q0,Q1,Q2,Q3,Q4");
  dht1.begin();
  dht2.begin();
  servo1.attach(SERVO1_PIN);
  servo2.attach(SERVO2_PIN);
  lcd.init();
  lcd.backlight();
  initDIPSwitches();
  lcd.clear();
  lcd.setCursor(0, 0);
  lcd.print("RL HVAC Controller");
  lcd.setCursor(0, 1);
  lcd.print("V2 - Corrected");
  delay(2000);
}

// Epsilon-Greedy algorithm to choose the best lambda
int chooseLambdaIndex() {
  // Explore: choose a random lambda with probability EPSILON
  if (random(1000) / 1000.0 < EPSILON) {
    return random(NUM_LAMBDAS); 
  }
  // Exploit: choose the lambda with the highest Q-value
  float bestQ = -999999.0;
  int bestIndex = 0;
  for (int i = 0; i < NUM_LAMBDAS; i++) {
    if (Q[i] > bestQ) {
      bestQ = Q[i];
      bestIndex = i;
    }
  }
  return bestIndex;
}

// Simplified MPC (Proportional Controller) to calculate control signal
float computeMPC(float currentTemp, float lambda) {
  float error = getSetpoint() - currentTemp;

  // IMPROVEMENT: If we are already at or above the setpoint, don't apply heating.
  if (error <= 0) {
    return 0.0;
  }

  // The control signal is proportional to the error.
  // Lambda acts as a damping factor: higher lambda -> less aggressive control.
  float control = error / (1.0 + lambda);
  return constrain(control, 0, 1); // Ensure control is between 0% and 100%
}

// **THIS IS THE SIMULATED PHYSICS OF YOUR ROOM**
// It takes the control signal and calculates the new temperature.
float applyActuation(float control, Servo &servo, int zoneIdx) {
  // Map the control signal (0-1) to servo angle (0-180)
  int pwm = map(control * 100, 0, 100, 0, 180);
  servo.write(pwm);

  // --- Simulated Physics Model ---
  // This function now models a HEATING system.
  // a: How much the previous temperature persists (thermal inertia). Closer to 1.0 means it changes slower.
  // b: How much effect the heater has. A higher value means the heater is more powerful.
  // control: The output of your controller (0 to 1).
  
  // CRITICAL FIX: 'b' is now POSITIVE. A positive control signal INCREASES temperature.
  float a = 0.98; 
  float b = 4.0;  // This represents the heater's power.
  float noise = random(-10, 11) * 0.01; // Small random fluctuation

  // The new temperature is a function of the old temp and the heater output
  zoneTemp[zoneIdx] = a * zoneTemp[zoneIdx] + b * control + noise;
  return zoneTemp[zoneIdx];
}

// Calculates the reward for a given state and action
float computeReward(float temp, float control, float lambda) {
  // Penalize being far from the setpoint. Squaring the error penalizes large errors more heavily.
  float comfort_penalty = pow(temp - getSetpoint(), 2);

  // Penalize using energy. The penalty is weighted by lambda.
  float energy_penalty = lambda * pow(control, 2);

  return -(comfort_penalty + energy_penalty);
}

// Updates the Q-value for the chosen lambda based on the reward received
void updateQ(int lambdaIdx, float reward) {
  // Standard Q-learning update rule
  Q[lambdaIdx] = Q[lambdaIdx] + ALPHA * (reward - Q[lambdaIdx]);
  N[lambdaIdx]++;
}

void displayStatus(float t1, float t2, float u1, float u2, float lambda, float reward) {
  lcd.clear();
  lcd.setCursor(0, 0);
  lcd.printf("Z1=%.1fC u1=%.0f%%", t1, u1 * 100);
  lcd.setCursor(0, 1);
  lcd.printf("Z2=%.1fC u2=%.0f%%", t2, u2 * 100);
  lcd.setCursor(0, 2);
  lcd.printf("lambda=%.1f r=%.2f", lambda, reward);
  lcd.setCursor(0, 3);
  lcd.printf("SP=%.1f Mode:%s", getSetpoint(), useFixedLambda() ? "MAN" : "AUTO");
}

void loop() {
  // Check for RL reset signal from DIP switch
  if (resetRL()) {
    for (int i = 0; i < NUM_LAMBDAS; i++) {
      Q[i] = 0;
      N[i] = 0;
    }
    lcd.setCursor(0, 2);
    lcd.print("RL Memory Reset!");
    delay(1500);
    return;
  }

  // 1. CHOOSE ACTION: Select a lambda value using epsilon-greedy
  int lambdaIdx = useFixedLambda() ? getFixedLambdaIndex() : chooseLambdaIndex();
  float lambda = lambdas[lambdaIdx];

  // (We read the temperature from our simulation variable)
  float t1 = zoneTemp[0];
  float t2 = zoneTemp[1];

  // 2. COMPUTE CONTROL: Calculate the required actuator output
  float u1 = computeMPC(t1, lambda);
  float u2 = computeMPC(t2, lambda);

  // 3. APPLY & SIMULATE: Apply control and get the new simulated temperature
  float newT1 = applyActuation(u1, servo1, 0);
  float newT2 = applyActuation(u2, servo2, 1);

  // 4. GET REWARD: Calculate the reward based on the outcome
  float reward = computeReward(newT1, u1, lambda) + computeReward(newT2, u2, lambda);
  
  // 5. LEARN: Update the Q-value for the chosen lambda if in AUTO mode
  if (!useFixedLambda()) {
    updateQ(lambdaIdx, reward);
  }

  // 6. DISPLAY: Show the current status on the LCD
  displayStatus(newT1, newT2, u1, u2, lambda, reward);
  static int timeStep = 0;
  Serial.printf("%d,%.2f,%.2f,%.2f,%.2f,%.1f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f\n",
                timeStep++,
                newT1,
                newT2,
                u1,
                u2,
                lambda,
                reward,
                Q[0], Q[1], Q[2], Q[3], Q[4]);

  delay(1000);
}