一次python和cpp的float32浮点乘法运算对齐

发布时间 2023-08-28 02:30:55作者: 咸鱼lzh

现象

1. python

Python 3.8.10 (default, Sep 28 2021, 16:10:42)
[GCC 9.3.0] on linux

import numpy as np
import struct

f_hex = '3daaaaab'
d13_hex = '4364e006'
ratio_x_hex = 'bf7142f2'
ratio_y_hex = '3eab37c5'
src_pts_hex = '443abbfc'

f = np.float32(struct.unpack('!f', bytes.fromhex(f_hex))[0])
d13 = np.float32(struct.unpack('!f', bytes.fromhex(d13_hex))[0])
ratio_x = np.float32(struct.unpack('!f', bytes.fromhex(ratio_x_hex))[0])
ratio_y = np.float32(struct.unpack('!f', bytes.fromhex(ratio_y_hex))[0])
src_pts = np.float32(struct.unpack('!f', bytes.fromhex(src_pts_hex))[0])

print(format(f.view(np.uint32), '08x'), 
      format(d13.view(np.uint32), '08x'), 
      format(ratio_x.view(np.uint32), '08x'), 
      format(ratio_y.view(np.uint32), '08x'),
      format(src_pts.view(np.uint32), '08x'))

result = np.float32(src_pts + f * 14 * d13 * ratio_x)

print(result)

print(format(result.view(np.uint32), '08x'))

output

3daaaaab 4364e006 bf7142f2 3eab37c5 443abbfc
495.2893
43f7a508

2. cpp

Apple clang version 14.0.3 (clang-1403.0.22.14.1)
Target: arm64-apple-darwin22.6.0

int main() {
    unsigned int f_uint = 0x3daaaaab;
    unsigned int d13_uint = 0x4364e006;
    unsigned int ratio_x_uint = 0xbf7142f2;
    unsigned int ratio_y_uint = 0x3eab37c5;
    unsigned int src_pts_uint = 0x443abbfc;

    float f = *reinterpret_cast<float*>(&f_uint);
    float d13 = *reinterpret_cast<float*>(&d13_uint);
    float ratio_x = *reinterpret_cast<float*>(&ratio_x_uint);
    float ratio_y = *reinterpret_cast<float*>(&ratio_y_uint);
    float src_pts = *reinterpret_cast<float*>(&src_pts_uint);

    float result = src_pts + f * 14 * d13 * ratio_x;
    printf("%f\n", result);
    unsigned int result_uint = *reinterpret_cast<unsigned int*>(&result);
    printf("%08x\n", result_uint);
    return 0;
}

output

495.289276
43f7a507

3. WQ5007 demo


#include "iot_io_api.h"
#include "iot_config_api.h"
#include "os_utils_api.h"
#include "iot_simd_api.h"


void cpp_float_calc_test()
{
    unsigned int f_uint = 0x3daaaaab;
    unsigned int d13_uint = 0x4364e006;
    unsigned int ratio_x_uint = 0xbf7142f2;
    unsigned int ratio_y_uint = 0x3eab37c5;
    unsigned int src_pts_uint = 0x443abbfc;

    float f = *reinterpret_cast<float*>(&f_uint);
    float d13 = *reinterpret_cast<float*>(&d13_uint);
    float ratio_x = *reinterpret_cast<float*>(&ratio_x_uint);
    float ratio_y = *reinterpret_cast<float*>(&ratio_y_uint);
    float src_pts = *reinterpret_cast<float*>(&src_pts_uint);

    float result = src_pts + f * 14 * d13 * ratio_x;
    iot_printf("%f\n", result);
    unsigned int result_uint = *reinterpret_cast<unsigned int*>(&result);
    iot_printf("%08x\n", result_uint);

}

uint32_t app_demo_entry()
{
    cpp_float_calc_test();

    return 0;
}

output

495.289307
43f7a508

4. WQ5007 实际项目中

output

495.289307
43f7a507

5. 用double精度计算的准确结果

Apple clang version 14.0.3 (clang-1403.0.22.14.1)
Target: arm64-apple-darwin22.6.0

int main() {
    unsigned int f_uint = 0x3daaaaab;
    unsigned int d13_uint = 0x4364e006;
    unsigned int ratio_x_uint = 0xbf7142f2;
    unsigned int ratio_y_uint = 0x3eab37c5;
    unsigned int src_pts_uint = 0x443abbfc;

    float f = *reinterpret_cast<float*>(&f_uint);
    float d13 = *reinterpret_cast<float*>(&d13_uint);
    float ratio_x = *reinterpret_cast<float*>(&ratio_x_uint);
    float ratio_y = *reinterpret_cast<float*>(&ratio_y_uint);
    float src_pts = *reinterpret_cast<float*>(&src_pts_uint);

    float result = src_pts + f * 14.0 * d13 * ratio_x;
    printf("%f\n", result);
    unsigned int result_uint = *reinterpret_cast<unsigned int*>(&result);
    printf("%08x\n", result_uint);
    return 0;
}

output

495.289307
43f7a508

现象描述

为避免浮点数打印带来的误差,参与运算的输入浮点数(f, d13, ratio_x, ratio_y, src_pts)都以二进制形式对齐,并限制精度为float32。但是结果还是不同。

  1. 跑在MacOS上的cpp和跑在linux上python3结果不同;
  2. 跑在wq5007芯片上的cpp和MacOS上的cpp结果不同;
  3. 不同上下文(wq5007上的demo和实际项目中)结果不同。

原因

TODO

解决

TODO