Just to prove it to myself, I made this amended sketch:
#include <avr/sleep.h>
#include <avr/power.h>
const byte tick = 3;
// interrupt on Timer 2 compare "A" completion - does nothing
EMPTY_INTERRUPT (TIMER2_COMPA_vect);
void setup()
{
pinMode (tick, OUTPUT);
// clock input to timer 2 from XTAL1/XTAL2
ASSR = _BV (AS2);
// set up timer 2 to count up to 32 * 1024 (32768)
TCCR2A = _BV (WGM21); // CTC
TCCR2B = _BV (CS20) | _BV (CS21) | _BV (CS22); // Prescaler of 1024
OCR2A = 31; // count to 32 (zero-relative)
// enable timer interrupts
TIMSK2 |= _BV (OCIE2A);
// disable ADC
ADCSRA = 0;
// turn off everything we can
power_adc_disable ();
power_spi_disable();
power_twi_disable();
power_timer0_disable();
power_timer1_disable();
power_usart0_disable();
// full power-down doesn't respond to Timer 2
set_sleep_mode (SLEEP_MODE_PWR_SAVE);
// get ready ...
sleep_enable();
} // end of setup
void loop()
{
// turn off brown-out enable in software
MCUCR = _BV (BODS) | _BV (BODSE);
MCUCR = _BV (BODS);
// sleep, finally!
sleep_cpu ();
// we awoke! pulse the clock hand
digitalWrite (tick, ! digitalRead (tick));
} // end of loop
With the 32.768 KHz crystal the LED (on pin 3 toggles every second). Now as this is under software control, you could do anything you want in the main loop, instead of the digitalWrite (eg. update a display).
The current consumption, when the LED is
not lit, was 1.46 uA, which is pretty low.
(edit) And 1.1 uA at 3.3V power supply.