I have used both - bought the 328 because it was a few pennies cheaper than the 328P. I modified a couple files - avrdude.conf and boards.txt in Arduino.
In the Boards.txt I copied the uno section and changed it to uno328. Change was this line - uno.build.mcu=atmega328p to uno328.build.mcu=atmega328
##############################################################
uno328.name=Arduino328
uno328.upload.protocol=arduino
uno328.upload.maximum_size=32256
uno328.upload.speed=115200
uno328.bootloader.low_fuses=0xff
uno328.bootloader.high_fuses=0xde
uno328.bootloader.extended_fuses=0x05
uno328.bootloader.path=optiboot
uno328.bootloader.file=optiboot_atmega328.hex
uno328.bootloader.unlock_bits=0x3F
uno328.bootloader.lock_bits=0x0F
uno328.build.mcu=atmega328
uno328.build.f_cpu=16000000L
uno328.build.core=arduino
uno328.build.variant=standard
for AVRDude.conf i copied the ATMega329P section and modified the name and the signature lines.
#------------------------------------------------------------
ATmega328
#------------------------------------------------------------
part
id = "m328";
desc = "ATMEGA328";
has_debugwire = yes;
flash_instr = 0xB6, 0x01, 0x11;
eeprom_instr = 0xBD, 0xF2, 0xBD, 0xE1, 0xBB, 0xCF, 0xB4, 0x00,
0xBE, 0x01, 0xB6, 0x01, 0xBC, 0x00, 0xBB, 0xBF,
0x99, 0xF9, 0xBB, 0xAF;
stk500_devcode = 0x86;
avr910_devcode = 0x;
signature = 0x1e 0x95 0x14;
pagel = 0xd7;
bs2 = 0xc2;
chip_erase_delay = 9000;
pgm_enable = "1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 1",
"x x x x x x x x x x x x x x x x";
chip_erase = "1 0 1 0 1 1 0 0 1 0 0 x x x x x",
"x x x x x x x x x x x x x x x x";
timeout = 200;
stabdelay = 100;
cmdexedelay = 25;
synchloops = 32;
bytedelay = 0;
pollindex = 3;
pollvalue = 0x53;
predelay = 1;
postdelay = 1;
pollmethod = 1;
pp_controlstack =
0x0E, 0x1E, 0x0F, 0x1F, 0x2E, 0x3E, 0x2F, 0x3F,
0x4E, 0x5E, 0x4F, 0x5F, 0x6E, 0x7E, 0x6F, 0x7F,
0x66, 0x76, 0x67, 0x77, 0x6A, 0x7A, 0x6B, 0x7B,
0xBE, 0xFD, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00;
hventerstabdelay = 100;
progmodedelay = 0;
latchcycles = 5;
togglevtg = 1;
poweroffdelay = 15;
resetdelayms = 1;
resetdelayus = 0;
hvleavestabdelay = 15;
resetdelay = 15;
chiperasepulsewidth = 0;
chiperasepolltimeout = 10;
programfusepulsewidth = 0;
programfusepolltimeout = 5;
programlockpulsewidth = 0;
programlockpolltimeout = 5;
memory "eeprom"
paged = no;
page_size = 4;
size = 1024;
min_write_delay = 3600;
max_write_delay = 3600;
readback_p1 = 0xff;
readback_p2 = 0xff;
read = " 1 0 1 0 0 0 0 0",
" 0 0 0 x x x a9 a8",
" a7 a6 a5 a4 a3 a2 a1 a0",
" o o o o o o o o";
write = " 1 1 0 0 0 0 0 0",
" 0 0 0 x x x a9 a8",
" a7 a6 a5 a4 a3 a2 a1 a0",
" i i i i i i i i";
loadpage_lo = " 1 1 0 0 0 0 0 1",
" 0 0 0 0 0 0 0 0",
" 0 0 0 0 0 0 a1 a0",
" i i i i i i i i";
writepage = " 1 1 0 0 0 0 1 0",
" 0 0 x x x x a9 a8",
" a7 a6 a5 a4 a3 a2 0 0",
" x x x x x x x x";
mode = 0x41;
delay = 20;
blocksize = 4;
readsize = 256;
;
memory "flash"
paged = yes;
size = 32768;
page_size = 128;
num_pages = 256;
min_write_delay = 4500;
max_write_delay = 4500;
readback_p1 = 0xff;
readback_p2 = 0xff;
read_lo = " 0 0 1 0 0 0 0 0",
" 0 0 a13 a12 a11 a10 a9 a8",
" a7 a6 a5 a4 a3 a2 a1 a0",
" o o o o o o o o";
read_hi = " 0 0 1 0 1 0 0 0",
" 0 0 a13 a12 a11 a10 a9 a8",
" a7 a6 a5 a4 a3 a2 a1 a0",
" o o o o o o o o";
loadpage_lo = " 0 1 0 0 0 0 0 0",
" 0 0 0 x x x x x",
" x x a5 a4 a3 a2 a1 a0",
" i i i i i i i i";
loadpage_hi = " 0 1 0 0 1 0 0 0",
" 0 0 0 x x x x x",
" x x a5 a4 a3 a2 a1 a0",
" i i i i i i i i";
writepage = " 0 1 0 0 1 1 0 0",
" 0 0 a13 a12 a11 a10 a9 a8",
" a7 a6 x x x x x x",
" x x x x x x x x";
mode = 0x41;
delay = 6;
blocksize = 128;
readsize = 256;
;
memory "lfuse"
size = 1;
min_write_delay = 4500;
max_write_delay = 4500;
read = "0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0",
"x x x x x x x x o o o o o o o o";
write = "1 0 1 0 1 1 0 0 1 0 1 0 0 0 0 0",
"x x x x x x x x i i i i i i i i";
;
memory "hfuse"
size = 1;
min_write_delay = 4500;
max_write_delay = 4500;
read = "0 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0",
"x x x x x x x x o o o o o o o o";
write = "1 0 1 0 1 1 0 0 1 0 1 0 1 0 0 0",
"x x x x x x x x i i i i i i i i";
;
memory "efuse"
size = 1;
min_write_delay = 4500;
max_write_delay = 4500;
read = "0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0",
"x x x x x x x x x x x x x o o o";
write = "1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0",
"x x x x x x x x x x x x x i i i";
;
memory "lock"
size = 1;
min_write_delay = 4500;
max_write_delay = 4500;
read = "0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0",
"x x x x x x x x x x o o o o o o";
write = "1 0 1 0 1 1 0 0 1 1 1 x x x x x",
"x x x x x x x x 1 1 i i i i i i";
;
memory "calibration"
size = 1;
read = "0 0 1 1 1 0 0 0 0 0 0 x x x x x",
"0 0 0 0 0 0 0 0 o o o o o o o o";
;
memory "signature"
size = 3;
read = "0 0 1 1 0 0 0 0 0 0 0 x x x x x",
"x x x x x x a1 a0 o o o o o o o o";
;
;
If you make those additions then Arduino can use the ArduinoISP to load without having to change the files back and forth to load them the next time. It also allows you to use AVRDude to load programs directly if you wish.
Color codes don't work inside code blocks...