MPU-6050 not working with slow serial comm

I'm having strange issues for the MPU-6050, using the I2Cdev & MPU-6050 libraries from Jeff R.
It all works fine if my Serial port is 115200, but at lower speeds it hangs up on the "Initializing I2C devices..." message.
At 38400 it sometimes start and sometime doesn't. This is the faster baud I can use (using the Sabertooth 2x25)
I really don't understand all the workings of this, but I wonder if there's a way to slow down whatever Arduino /MPU-6050 communications or something. I don't really need reading as fast as it gives them. My first version of a balancing bot worked great with readings only every 50ms.
Here's my code, it all seems working @ 115200 baud (only tested with a monitor reading since I can't communicate with the sabertooth).
I'm really stumped!
thanks

#include "Wire.h"
// I2Cdev and MPU6050 must be installed as libraries, or else the .cpp/.h files
// for both classes must be in the include path of your project
#include "I2Cdev.h"
#include "MPU6050_6Axis_MotionApps20.h"
//#include "MPU6050.h" // not necessary if using MotionApps include file
MPU6050 mpu;
/* =========================================================================
 NOTE: In addition to connection 3.3v, GND, SDA, and SCL, this sketch
 depends on the MPU-6050's INT pin being connected to the Arduino's
 external interrupt #0 pin.
/* =========================================================================
 // uncomment "OUTPUT_READABLE_YAWPITCHROLL" if you want to see the yaw/
 // pitch/roll angles (in degrees) calculated from the quaternions coming
 // from the FIFO. Note this also requires gravity vector calculations.
 // Also note that yaw/pitch/roll angles suffer from gimbal lock (for
 // more info, see: http://en.wikipedia.org/wiki/Gimbal_lock)
 */
#define OUTPUT_READABLE_YAWPITCHROLL
// MPU control/status vars
bool dmpReady = false;  // set true if DMP init was successful
uint8_t mpuIntStatus;   // holds actual interrupt status byte from MPU
uint8_t devStatus;      // return status after each device operation (0 = success, !0 = error)
uint16_t packetSize;    // expected DMP packet size (default is 42 bytes)
uint16_t fifoCount;     // count of all bytes currently in FIFO
uint8_t fifoBuffer[64]; // FIFO storage buffer
// orientation/motion vars
Quaternion q;           // [w, x, y, z]         quaternion container
VectorInt16 aa;         // [x, y, z]            accel sensor measurements
VectorInt16 aaReal;     // [x, y, z]            gravity-free accel sensor measurements
VectorInt16 aaWorld;    // [x, y, z]            world-frame accel sensor measurements
VectorFloat gravity;    // [x, y, z]            gravity vector
float euler[3];         // [psi, theta, phi]    Euler angle container
float ypr[3];           // [yaw, pitch, roll]   yaw/pitch/roll container and gravity vector
// ================================================================
// ===               INTERRUPT DETECTION ROUTINE                ===
// ================================================================
volatile bool mpuInterrupt = false;     // indicates whether MPU interrupt pin has gone high
void dmpDataReady() {
  mpuInterrupt = true;
}
// ================================================================
// ===                      KEENEWAY SETUP                      ===
// ================================================================
#include <SoftwareSerial.h>
SoftwareSerial Segway(4,12); //(4, 12); // RX, TX, (D12 is used to send commands to the Sabertooth)
boolean debugMode = false;  //set to true to execute Serial debug data
int pitch, turn;

// ================================================================
// ===                      INITIAL SETUP                       ===
// ================================================================

void setup() {
  // join I2C bus (I2Cdev library doesn't do this automatically)
  Wire.begin();

  // initialize serial communication
  Serial.begin(115200);
  Segway.begin(9600);         //begin communication with Segway motor driver 
  // initialize device
  Serial.println(F("Initializing I2C devices..."));
  mpu.initialize();

  // verify connection
  Serial.println(F("Testing device connections..."));
  Serial.println(mpu.testConnection() ? F("MPU6050 connection successful") : F("MPU6050 connection failed"));

  // wait for ready
  Serial.println(F("\nSend any character to begin DMP programming and demo: "));
  while (Serial.available() && Serial.read()); // empty buffer
  //while (!Serial.available());                 // wait for data
  while (Serial.available() && Serial.read()); // empty buffer again

  // load and configure the DMP
  Serial.println(F("Initializing DMP..."));
  devStatus = mpu.dmpInitialize();

  // make sure it worked (returns 0 if so)
  if (devStatus == 0) {
    // turn on the DMP, now that it's ready
    Serial.println(F("Enabling DMP..."));
    mpu.setDMPEnabled(true);

    // enable Arduino interrupt detection
    Serial.println(F("Enabling interrupt detection (Arduino external interrupt 0)..."));
    attachInterrupt(0, dmpDataReady, RISING);
    mpuIntStatus = mpu.getIntStatus();

    // set our DMP Ready flag so the main loop() function knows it's okay to use it
    Serial.println(F("DMP ready! Waiting for first interrupt..."));
    dmpReady = true;

    // get expected DMP packet size for later comparison
    packetSize = mpu.dmpGetFIFOPacketSize();
  } 
  else {
    // ERROR!
    // 1 = initial memory load failed
    // 2 = DMP configuration updates failed
    // (if it's going to break, usually the code will be 1)
    Serial.print(F("DMP Initialization failed (code "));
    Serial.print(devStatus);
    Serial.println(F(")"));
  }
}



// ================================================================
// ===                    MAIN PROGRAM LOOP                     ===
// ================================================================

void loop() {
  // if programming failed, don't try to do anything
  if (!dmpReady) return;

  // wait for MPU interrupt or extra packet(s) available
  while (!mpuInterrupt && fifoCount < packetSize) {
    // other program behavior stuff here
    update_motor();
    }

    // reset interrupt flag and get INT_STATUS byte
    mpuInterrupt = false;
  mpuIntStatus = mpu.getIntStatus();

  // get current FIFO count
  fifoCount = mpu.getFIFOCount();

  // check for overflow (this should never happen unless our code is too inefficient)
  if ((mpuIntStatus & 0x10) || fifoCount == 1024) {
    // reset so we can continue cleanly
    mpu.resetFIFO();
    if (debugMode) {
      Serial.println(F("FIFO overflow!"));
    }
    // otherwise, check for DMP data ready interrupt (this should happen frequently)
  } 
  else if (mpuIntStatus & 0x02) {
    // wait for correct available data length, should be a VERY short wait
    while (fifoCount < packetSize) fifoCount = mpu.getFIFOCount();

    // read a packet from FIFO
    mpu.getFIFOBytes(fifoBuffer, packetSize);

    // track FIFO count here in case there is > 1 packet available
    // (this lets us immediately read more without waiting for an interrupt)
    fifoCount -= packetSize;

#ifdef OUTPUT_READABLE_YAWPITCHROLL
    // display Euler angles in degrees
    mpu.dmpGetQuaternion(&q, fifoBuffer);
    mpu.dmpGetGravity(&gravity, &q);
    mpu.dmpGetYawPitchRoll(ypr, &q, &gravity);
    pitch = (int(ypr[1] * 180/M_PI));
    turn = (int(ypr[2] * 180/M_PI));
    if (debugMode) {
      Serial.print("Pitch\t");
      Serial.print(int(ypr[1] * 180/M_PI));
      Serial.print("\tturn\t");
      Serial.println(int(ypr[2] * 180/M_PI));
    }
#endif
  }
}

void update_motor(){  // Update the motors
  int maxPitch = 30;
  int maxTurn = 10; //the max angle for the most turn
  turn = constrain(turn, -maxTurn, maxTurn);

  if (pitch > 60) crash(); //we probably crashed
  if (pitch > maxPitch) tooFast(); //we are going too fast, so try to slow it down
  int motor_speed = map(pitch, -maxPitch, maxPitch, -64, 64); // map the angle to the sabertooth range 1-64  
  int motor1 = motor_speed + (turn);  //add steering bias to motor 1
  int motor2 = motor_speed - (turn);  //add steering bias to motor 2
  // assign final motor output values
  motor1 = 64 + motor1;                //64 is neutral for motor 1
  motor2 = 192 + motor2;               //192 is neutral for motor 2
  motor1 = constrain(motor1, 1, 127);  //constrain the value to it's min/max
  motor2 = constrain(motor2, 128, 255);//constrain the value to it's min/max
  Serial.print(motor1); //Send motor 1 speed over serial
  Serial.print("\t");
  //delay(1);  //sabertooth can only receive commands at 2000/second
  Serial.println(motor2);  //Send motor 2 speed over serial
}

void turnAdjust() { //return turn adjust for speed
  int maxTurn = 10; //the max angle for the most turn
  turn = constrain(turn, -maxTurn, maxTurn);
}

PS. That softwareSerial call @ 9600 make it not work too. If I change it to higher or just remove it, it'll work. Once I get it working, I won't be using Serial, only that SoftwareSerial call.

I discovered that with the slower Serial Comm speed, this code is never getting into this while statement:

while (!mpuInterrupt && fifoCount < packetSize) {
    // other program behavior stuff here
    update_motor();
 }

If I move my routine outside of this while statement, it works. I the serial monitor, I get a fifo overflow error, but it still works and gives me good readings, so I guess this doesn't matter. I think I vaguely understand the problem--that the slower serial speed is making it not be able to keep up with the data coming from the IMU? Anyway, I don't think I need data that fast for my application, so I guess the fifo overflow won't matter?

you can have all the answers that u need over here

... if you can be arsed to read 105 pages of comments ...