Have you read AVR201? There are 16 x 16 = 24 multiplication, which is close to what you like to accomplish, probably you can skip couple lines:
;******************************************************************************
;*
;* FUNCTION
;* mul16x16_24
;* DECRIPTION
;* Unsigned multiply of two 16bits numbers with 24bits result.
;* USAGE
;* r18:r17:r16 = r23:r22 * r21:r20
;* STATISTICS
;* Cycles : 14 + ret
;* Words : 10 + ret
;* Register usage: r0 to r1, r16 to r18 and r20 to r23 (9 registers)
;* NOTE
;* Full orthogonality i.e. any register pair can be used as long as
;* the 24bit result and the two operands does not share register pairs.
;* The routine is non-destructive to the operands.
;*
;******************************************************************************
mul16x16_24:
mul r23, r21 ; ah * bh
mov r18, r0
mul r22, r20 ; al * bl
movw r17:r16, r1:r0
mul r23, r20 ; ah * bl
add r17, r0
adc r18, r1
mul r21, r22 ; bh * al
add r17, r0
adc r18, r1
ret