Self-Balancing Robot using LQr

Hello everyone, I am working on self balancing robot which uses LQR as control system.
I am updating the states of system every 6ms and then calculating the PWM to apply to the motors.
I am not able to balance it no matter how much time I spend to tune it. Tilt angle is pretty accurate and converges very quickly. I am using Kalman Filter for sensor fusion. Position and velocity is calculated using inbulit optical encoder in the motors. Gain matrix is calculated using Octave. In octave the bot is easily balanced in simulation. This is my code:

#include <MatrixMath.h>
#include <TimerOne.h>
#include <Wire.h>
#include <MPU6050.h>
#include <KalmanFilter.h>

MPU6050 mpu;

KalmanFilter kalmanX(0.001, 0.003, 0.03);
KalmanFilter kalmanY(0.001, 0.003, 0.03);

float accPitch = 0;
float accRoll = 0;

float kalPitch = 0;
float kalRoll = 0;


const int pinA1 = 3; //Right motor encoder pins
const int pinB1 = 4;

const int pinA2 = 18;  //Left motor encoder pins
const int pinB2 = 17;

volatile float cnt1 = 0, cnt2 = 0, c1 = 0, c2 = 0, error = 0, prevError = 0, distance1 = 0, distance2 = 0, prev_distance1 = 0, prev_distance2 = 0, velocity1 = 0, velocity2 = 0, yaw = 0;

float A[4][4] = {{1, 0.006, -0.0002149, -0.0000004298},     //State Matrix in discrete form calculated using Octave
  {0, 1, -0.0716,  -0.0002149},
  {0, 0,  0.9978, 0.005996},
  {0, 0, -0.7447, 0.9978}
};

float B[4][2] = {{0.00006116,  0.00006116},       ////Input Matrix in discrete form calculated using Octave
  {0.02039, 0.02039},
  { -0.0004424, -0.0004424},
  { -0.1474, -0.1474}
};

float K[2][4] = {{ 4.8743  ,  3.0856 , -25.1865 ,  -1.4309},    //Gain Matrix K, 2x4 because of two motors
  { 4.8743  ,  3.0856 , -25.1865 ,  -1.4309}

};

float BK[4][4], A_BK[4][4], X[4], X_dot[4], X_X_set[4], u[2]; //Matrices to calculate U which is our Input

float X_set[4] = {0, 0, -2.15, 0};  //{position, velocity, tilt_angle, angular_velocity}, -2.15 because the bot seems to be balanced at this angle.
float F1 = 0, F2 = 0, pitch_rate = 0, pitch_old = 0;


void setup()
{
  Serial.begin(9600);

  // Initialize MPU6050
  while (!mpu.begin(MPU6050_SCALE_250DPS, MPU6050_RANGE_2G))
  {
    delay(500);
  }

  // Calibrate gyroscope. The calibration must be at rest.
  // If you don't want calibrate, comment this line.
  mpu.calibrateGyro();

  pinMode(pinB1, INPUT_PULLUP);
  pinMode(pinA1, INPUT_PULLUP);
  pinMode(22, OUTPUT);
  pinMode(24, OUTPUT);
  pinMode(26, OUTPUT);
  pinMode(28, OUTPUT);
  digitalWrite(22, LOW);
  digitalWrite(24, HIGH);
  digitalWrite(26, LOW);
  digitalWrite(28, HIGH);
  pinMode(44, OUTPUT);
  pinMode(46, OUTPUT);
  analogWrite(44, 0);
  analogWrite(46, 0);
  Timer1.initialize(6000);
  Timer1.attachInterrupt(calculation);
  attachInterrupt(digitalPinToInterrupt(pinA1), counter1, RISING);
  attachInterrupt(digitalPinToInterrupt(pinA2), counter2, RISING);
}

void loop()
{
  Vector acc = mpu.readNormalizeAccel();
  Vector gyr = mpu.readNormalizeGyro();

  // Calculate Pitch & Roll from accelerometer (deg)
  accPitch = -(atan2(acc.XAxis, sqrt(acc.YAxis * acc.YAxis + acc.ZAxis * acc.ZAxis)) * 180.0) / M_PI;
  accRoll  = (atan2(acc.YAxis, acc.ZAxis) * 180.0) / M_PI;

  // Kalman filter
  kalPitch = kalmanY.update(accPitch, gyr.YAxis);
  kalRoll = kalmanX.update(accRoll, gyr.XAxis);

  Matrix.Multiply((float*)B, (float*)K, 4, 2, 4, (float*)BK);
  Matrix.Subtract((float*)A, (float*)BK, 4, 4, (float*)A_BK);
  Matrix.Multiply((float*)A_BK, (float*)X, 4, 4, 1, (float*)X_dot);
  Matrix.Subtract((float*)X, (float*)X_set, 4, 1, (float*)X_X_set);
  Matrix.Multiply((float*)K, (float*)X_X_set, 2, 4, 1, (float*)u);
  
  F1 = constrain(u[0], -255, 255);  //Constraing PWM to -255 to 255
  F2 = constrain(u[1], -255, 255);

  //After that every 6ms, a function(calculation) is called using interrupts to calculate new state and to apply PWM to motors
}

void counter1() {
  if (digitalRead(pinB1) == HIGH) {
    cnt1++;
  } else {
    cnt1--;
  }
}

void counter2() {
  if (digitalRead(pinB2) == HIGH) {
    cnt2--;
  } else {
    cnt2++;
  }
}

void calculation() {

  pitch_rate = (kalPitch - pitch_old) / 0.01;
  pitch_old = kalPitch;

  distance1 += (cnt1 / 270) * 0.2042;
  distance2 += (cnt2 / 270) * 0.2042;
  velocity1 = (distance1 - prev_distance1) / 0.004;
  velocity2 = (distance2 - prev_distance2) / 0.004;
  prev_distance1 = distance1;
  prev_distance2 = distance2;
  yaw = ((distance1 - distance2) / 1.20) * 360;
  cnt1 = 0;
  cnt2 = 0;
  X[0] = distance1;
  X[1] = velocity1;
  X[2] = kalPitch;
  X[3] = pitch_rate;
  MotorLeft(F1);
  MotorRight(F2);
}

void MotorLeft(float PWM) {
  if (PWM < 0) {
    PWM = abs(PWM);
    digitalWrite(22, HIGH);
    digitalWrite(24, LOW);
    analogWrite(44, PWM);
  } else {
    PWM = abs(PWM);
    digitalWrite(22, LOW);
    digitalWrite(24, HIGH);
    analogWrite(44, PWM);
  }
}

void MotorRight(float PWM) {
  if (PWM < 0) {
    PWM = abs(PWM);
    digitalWrite(26, HIGH);
    digitalWrite(28, LOW);
    analogWrite(46, PWM);
  } else {
    PWM = abs(PWM);
    digitalWrite(26, LOW);
    digitalWrite(28, HIGH);
    analogWrite(46, PWM);
  }
}

I have tried to explain everything in my code using comments. Feel free to ask any other question.