Problem with Serial on Arduino Pro 328 3.3V 8MHz

I gather that the U2X bit was being used as a quick cheat for compiling the bootloader (hey, if it all works at 16MHz, just set the U2X bit on the 8MHz parts, and we don't have to make any other #define's in the code). I'll look more into the bootloader later.

Here is the fixed HardwareSerial.h and HardwareSerial.cpp:

HardwareSerial.h

/*
  HardwareSerial.h - Hardware serial library for Wiring
  Copyright (c) 2006 Nicholas Zambetti.  All right reserved.

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*/

#ifndef HardwareSerial_h
#define HardwareSerial_h

#include <inttypes.h>

#include "Print.h"

struct ring_buffer;

class HardwareSerial : public Print
{
  private:
    ring_buffer *_rx_buffer;
    volatile uint8_t *_ubrrh;
    volatile uint8_t *_ubrrl;
    volatile uint8_t *_ucsra;
    volatile uint8_t *_ucsrb;
    volatile uint8_t *_udr;
    uint8_t _rxen;
    uint8_t _txen;
    uint8_t _rxcie;
    uint8_t _udre;
    uint8_t _u2x;
  public:
    HardwareSerial(ring_buffer *rx_buffer,
      volatile uint8_t *ubrrh, volatile uint8_t *ubrrl,
      volatile uint8_t *ucsra, volatile uint8_t *ucsrb,
      volatile uint8_t *udr,
      uint8_t rxen, uint8_t txen, uint8_t rxcie, uint8_t udre, uint8_t u2x);
    void begin(long);
    uint8_t available(void);
    int read(void);
    void flush(void);
    virtual void write(uint8_t);
    using Print::write; // pull in write(str) and write(buf, size) from Print
};

extern HardwareSerial Serial;

#if defined(__AVR_ATmega1280__)
extern HardwareSerial Serial1;
extern HardwareSerial Serial2;
extern HardwareSerial Serial3;
#endif

#endif

and HardwareSerial.cpp

/*
  HardwareSerial.cpp - Hardware serial library for Wiring
  Copyright (c) 2006 Nicholas Zambetti.  All right reserved.

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  
  Modified 23 November 2006 by David A. Mellis
*/

#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include "wiring.h"
#include "wiring_private.h"

#include "HardwareSerial.h"

// Define constants and variables for buffering incoming serial data.  We're
// using a ring buffer (I think), in which rx_buffer_head is the index of the
// location to which to write the next incoming character and rx_buffer_tail
// is the index of the location from which to read.
#define RX_BUFFER_SIZE 128

struct ring_buffer {
  unsigned char buffer[RX_BUFFER_SIZE];
  int head;
  int tail;
};

ring_buffer rx_buffer = { { 0 }, 0, 0 };

#if defined(__AVR_ATmega1280__)
ring_buffer rx_buffer1 = { { 0 }, 0, 0 };
ring_buffer rx_buffer2 = { { 0 }, 0, 0 };
ring_buffer rx_buffer3 = { { 0 }, 0, 0 };
#endif

inline void store_char(unsigned char c, ring_buffer *rx_buffer)
{
      int i = (rx_buffer->head + 1) % RX_BUFFER_SIZE;

      // if we should be storing the received character into the location
      // just before the tail (meaning that the head would advance to the
      // current location of the tail), we're about to overflow the buffer
      // and so we don't write the character or advance the head.
      if (i != rx_buffer->tail) {
            rx_buffer->buffer[rx_buffer->head] = c;
            rx_buffer->head = i;
      }
}

#if defined(__AVR_ATmega1280__)

SIGNAL(SIG_USART0_RECV)
{
      unsigned char c = UDR0;
  store_char(c, &rx_buffer);
}

SIGNAL(SIG_USART1_RECV)
{
      unsigned char c = UDR1;
  store_char(c, &rx_buffer1);
}

SIGNAL(SIG_USART2_RECV)
{
      unsigned char c = UDR2;
  store_char(c, &rx_buffer2);
}

SIGNAL(SIG_USART3_RECV)
{
      unsigned char c = UDR3;
  store_char(c, &rx_buffer3);
}

#else

#if defined(__AVR_ATmega8__)
SIGNAL(SIG_UART_RECV)
#else
SIGNAL(USART_RX_vect)
#endif
{
#if defined(__AVR_ATmega8__)
      unsigned char c = UDR;
#else
      unsigned char c = UDR0;
#endif
  store_char(c, &rx_buffer);
}

#endif

// Constructors ////////////////////////////////////////////////////////////////

HardwareSerial::HardwareSerial(ring_buffer *rx_buffer,
  volatile uint8_t *ubrrh, volatile uint8_t *ubrrl,
  volatile uint8_t *ucsra, volatile uint8_t *ucsrb,
  volatile uint8_t *udr,
  uint8_t rxen, uint8_t txen, uint8_t rxcie, uint8_t udre, uint8_t u2x)
{
  _rx_buffer = rx_buffer;
  _ubrrh = ubrrh;
  _ubrrl = ubrrl;
  _ucsra = ucsra;
  _ucsrb = ucsrb;
  _udr = udr;
  _rxen = rxen;
  _txen = txen;
  _rxcie = rxcie;
  _udre = udre;
  _u2x = u2x;
}

// Public Methods //////////////////////////////////////////////////////////////

void HardwareSerial::begin(long speed)
{
      *_ubrrh = ((F_CPU / 16 + speed / 2) / speed - 1) >> 8;
      *_ubrrl = ((F_CPU / 16 + speed / 2) / speed - 1);
      cbi(*_ucsra, _u2x);
  sbi(*_ucsrb, _rxen);
  sbi(*_ucsrb, _txen);
  sbi(*_ucsrb, _rxcie);
}

uint8_t HardwareSerial::available(void)
{
      return (RX_BUFFER_SIZE + _rx_buffer->head - _rx_buffer->tail) % RX_BUFFER_SIZE;
}

int HardwareSerial::read(void)
{
      // if the head isn't ahead of the tail, we don't have any characters
      if (_rx_buffer->head == _rx_buffer->tail) {
            return -1;
      } else {
            unsigned char c = _rx_buffer->buffer[_rx_buffer->tail];
            _rx_buffer->tail = (_rx_buffer->tail + 1) % RX_BUFFER_SIZE;
            return c;
      }
}

void HardwareSerial::flush()
{
      // don't reverse this or there may be problems if the RX interrupt
      // occurs after reading the value of rx_buffer_head but before writing
      // the value to rx_buffer_tail; the previous value of rx_buffer_head
      // may be written to rx_buffer_tail, making it appear as if the buffer
      // don't reverse this or there may be problems if the RX interrupt
      // occurs after reading the value of rx_buffer_head but before writing
      // the value to rx_buffer_tail; the previous value of rx_buffer_head
      // may be written to rx_buffer_tail, making it appear as if the buffer
      // were full, not empty.
      _rx_buffer->head = _rx_buffer->tail;
}

void HardwareSerial::write(uint8_t c)
{
      while (!((*_ucsra) & (1 << _udre)))
            ;

      *_udr = c;
}

// Preinstantiate Objects //////////////////////////////////////////////////////

#if defined(__AVR_ATmega8__)
HardwareSerial Serial(&rx_buffer, &UBRRH, &UBRRL, &UCSRA, &UCSRB, &UDR, RXEN, TXEN, RXCIE, UDRE, U2X);
#else
HardwareSerial Serial(&rx_buffer, &UBRR0H, &UBRR0L, &UCSR0A, &UCSR0B, &UDR0, RXEN0, TXEN0, RXCIE0, UDRE0, U2X0);
#endif

#if defined(__AVR_ATmega1280__)
HardwareSerial Serial1(&rx_buffer1, &UBRR1H, &UBRR1L, &UCSR1A, &UCSR1B, &UDR1, RXEN1, TXEN1, RXCIE1, UDRE1, U2X1);
HardwareSerial Serial2(&rx_buffer2, &UBRR2H, &UBRR2L, &UCSR2A, &UCSR2B, &UDR2, RXEN2, TXEN2, RXCIE2, UDRE2, U2X2);
HardwareSerial Serial3(&rx_buffer3, &UBRR3H, &UBRR3L, &UCSR3A, &UCSR3B, &UDR3, RXEN3, TXEN3, RXCIE3, UDRE3, U2X3);
#endif

Compiled and tested on atmega328P @ 16MHz.

b