Well, I gave up trying to fix that stuff. The only thing I needed was to convert a byte-array containing a double (big endian) to float.
I wrote the following function to do this and it seems to work, accurate down to the last bit. There are some limitations (see code) but they are don't cares for my usage.
I think it's pretty efficient, too.
/*
Copyright © 2017, aweatherguy
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
The name "aweatherguy" may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define USE_ROUNDING 1
#define BUF_IS_BIG_ENDIAN 1
#include <inttypes.h>
__attribute__((noinline))
float PackedDouble2Float(uint8_t* buf)
{
//
// Efficient conversion from 8-byte double to floating point.
// Basic overview of steps:
// 1) Move 5 MSBs of double into registers r21..r25
// 2) Test bit 4 in LSB for optional rounding. If set, add 0x10 to LSB,
// and propagate carry through remaining 4 bytes. Because there is no
// ADDI/ADCI instructions, we must use SUBI/SBCI instead.
// 3) Shift 5-byte word left 3 bits.
// 4) Flip bit 6 of MSB (takes care of exponent adjustment)
// 5) Turn off bit 7 of MSB (sign bit)
// 6) If sign bit of source was set, then set bit 7 of MSB
// Finished. Result is in r22..r25 which is the proper location for
// return to the calling function (see avr-gcc compiler calling conventions).
//
// Limitations (these will result in unpredictable results):
//
// - No support for special values such as Zero, Inf or NaN
// - No detection of exponent overflow
//
// Testing
//
// So far, basic conversion of numbers and proper behavior of rounding have been
// tested. Rounding which results in overflow of mantissa requiring exponent
// adjustment also seems to work. No testing of special values or numbers
// close to limts has been done.
//
// Calling conventions:
//
// The address of buf is passed in r24/r25, and the compiler will emit
// a "movw r30,r24" to get that into the Z register before inserting the assembly code.
//
// The result must be returned in r22..r25. If the __asm__ code simply leaves the result there,
// nothing else needs to be done; the compiler will emit a "ret" instruction without
// altering the values left in those registers.
//
//
__asm__ __volatile__ (
#if BUF_IS_BIG_ENDIAN
"ld r25,z \n\t"
"ldd r24,z+1 \n\t"
"ldd r23,z+2 \n\t"
"ldd r22,z+3 \n\t"
"ldd r21,z+4 \n\t"
#else
"ld r21,z \n\t"
"ldd r22,z+1 \n\t"
"ldd r23,z+2 \n\t"
"ldd r24,z+3 \n\t"
"ldd r25,z+4 \n\t"
#endif
#if USE_ROUNDING
"ldi r20,0x10 \n\t"
"and r20,r21 \n\t"
"breq .+10 \n\t"
"subi r21,0xef \n\t"
"sbci r22,0xff \n\t"
"sbci r23,0xff \n\t"
"sbci r24,0xff \n\t"
"sbci r25,0xff \n\t"
#endif
"ldi r20,3 \n\t"
"lsl r21 \n\t"
"rol r22 \n\t"
"rol r23 \n\t"
"rol r24 \n\t"
"rol r25 \n\t"
"dec r20 \n\t"
"brne .-14 \n\t"
"ldi r20,0x40 \n\t"
"eor r25,r20 \n\t"
"andi r25,0x7f \n\t"
"ld r20,z \n\t"
"lsl r20 \n\t"
"brcc .+2 \n\t"
"ori r25,0x80 \n\t"
:
: "z" (buf)
: "r20", "r21", "r22", "r23", "r24", "r25"
);
}