数字信号处理：在ARM CMSIS-DSP库中使用FFT进行实时音频频谱分析的定点数优化

时间：2026-05-19 16:31:37

关键字：数字信号 ARM

手机看文章

扫描二维码
随时随地手机看文章

[导读]在嵌入式音频处理应用中，实时频谱分析是常见需求。ARM Cortex-M系列处理器结合CMSIS-DSP库，为这类应用提供了高效的计算基础。特别是在资源受限的环境中，定点数FFT优化成为平衡性能与精度的关键。本文将深入探讨如何使用CMSIS-DSP库的定点数FFT函数，实现实时音频频谱分析。

在嵌入式音频处理应用中，实时频谱分析是常见需求。ARM Cortex-M系列处理器结合CMSIS-DSP库，为这类应用提供了高效的计算基础。特别是在资源受限的环境中，定点数FFT优化成为平衡性能与精度的关键。本文将深入探讨如何使用CMSIS-DSP库的定点数FFT函数，实现实时音频频谱分析。

一、定点数FFT基础

1.1 定点数格式选择

CMSIS-DSP库支持多种定点数格式，Q15和Q31是最常用的两种。Q15格式使用16位表示，1位符号位，15位小数位；Q31格式使用32位，1位符号位，31位小数位。

// 常用定点数格式

typedef int16_t q15_t; // Q15格式：1.15

typedef int32_t q31_t; // Q31格式：1.31

typedef int64_t q63_t; // Q63格式：1.63

// 转换宏

#define Q15_TO_FLOAT(x) ((float)(x) / 32768.0f)

#define FLOAT_TO_Q15(x) ((q15_t)((x) * 32768.0f))

1.2 FFT长度选择

在音频处理中，FFT长度影响频率分辨率和计算复杂度。常见选择：

• 256点：适用于低延迟应用

• 512点：平衡延迟与分辨率

• 1024点：高分辨率分析

• 2048点：专业级分析

二、Q15定点数FFT实现

2.1 初始化FFT实例

#include "arm_math.h"

#include "arm_const_structs.h"

// 定义FFT参数

#define FFT_SIZE 256

#define FFT_COMPLEX_SIZE (FFT_SIZE * 2) // 复数数据大小

// 缓冲区声明

q15_t input_q15[FFT_SIZE]; // 输入数据

q15_t output_q15[FFT_SIZE * 2]; // 输出复数

q15_t scratch_q15[FFT_SIZE * 2]; // 临时缓冲区

// 初始化Q15 FFT

void init_fft_q15(void)

{

// 检查FFT长度是否支持

// CMSIS-DSP支持16, 32, 64, 128, 256, 512, 1024, 2048, 4096点FFT

if (FFT_SIZE == 256) {

// 使用预定义的FFT结构体

const arm_cfft_instance_q15 *S = &arm_cfft_sR_q15_len256;

} else {

// 动态创建FFT实例

arm_cfft_radix4_instance_q15 fft_instance;

arm_status status = arm_cfft_radix4_init_q15(&fft_instance,

FFT_SIZE,

0, // 正向变换

1); // 位反转

if (status != ARM_MATH_SUCCESS) {

// 处理错误

}

2.2 预处理：加窗与缩放

// 汉宁窗生成（Q15格式）

void generate_hann_window_q15(q15_t *window, uint32_t size)

{

for (uint32_t i = 0; i < size; i++) {

// 汉宁窗公式：0.5 * (1 - cos(2*π*i/(N-1)))

// 在Q15格式中实现

float32_t angle = 2.0f * 3.1415926535f * i / (size - 1);

float32_t hann = 0.5f * (1.0f - cosf(angle));

window[i] = (q15_t)(hann * 32768.0f);

}

// 应用窗函数（输入为Q15格式）

void apply_window_q15(q15_t *data, const q15_t *window, uint32_t size)

{

q31_t temp; // 使用Q31防止中间溢出

for (uint32_t i = 0; i < size; i++) {

// Q15乘法，结果需要右移15位

temp = (q31_t)data[i] * (q31_t)window[i];

data[i] = (q15_t)(temp >> 15);

}

// 自动缩放输入数据

void auto_scale_input_q15(q15_t *data, uint32_t size)

{

// 查找最大值

q15_t max_val = 0;

for (uint32_t i = 0; i < size; i++) {

q15_t abs_val = abs(data[i]);

if (abs_val > max_val) {

max_val = abs_val;

}

// 如果最大值接近饱和，进行缩放

if (max_val > 30000) { // 接近Q15最大值32768

q15_t scale_factor = 30000; // 目标最大值

for (uint32_t i = 0; i < size; i++) {

q31_t scaled = (q31_t)data[i] * (q31_t)scale_factor;

data[i] = (q15_t)((scaled / max_val) >> 15);

}

2.3 执行FFT变换

// 执行Q15 FFT

void perform_fft_q15(q15_t *input, q15_t *output,

q15_t *scratch, uint32_t size)

{

// 1. 准备复数输入（交织格式：实部，虚部，实部，虚部...）

for (uint32_t i = 0; i < size; i++) {

output[2*i] = input[i]; // 实部

output[2*i + 1] = 0; // 虚部

}

// 2. 执行FFT

arm_cfft_q15(&arm_cfft_sR_q15_len256, output, 0, 1);

// 3. 计算幅度（可选）

// 如果只需要幅度谱，可以在这里计算

}

三、Q31定点数FFT优化

3.1 Q31格式优势

Q31格式提供更高的精度，适合对动态范围要求高的应用。

// Q31 FFT实现

#define FFT_SIZE_Q31 1024

q31_t input_q31[FFT_SIZE_Q31];

q31_t output_q31[FFT_SIZE_Q31 * 2];

q31_t scratch_q31[FFT_SIZE_Q31];

// 初始化Q31 FFT

void init_fft_q31(void)

{

// 使用预定义的结构体

const arm_cfft_instance_q31 *S = &arm_cfft_sR_q31_len1024;

}

// 执行Q31 FFT

void perform_fft_q31(q31_t *input, q31_t *output,

uint32_t size, uint8_t ifftFlag,

uint8_t bitReverseFlag)

{

// 准备复数输入

for (uint32_t i = 0; i < size; i++) {

output[2*i] = input[i] >> 2; // 右移2位防止溢出

output[2*i + 1] = 0;

}

// 执行FFT

arm_cfft_q31(&arm_cfft_sR_q31_len1024, output, ifftFlag, bitReverseFlag);

}

3.2 混合精度计算

在某些情况下，可以混合使用Q15和Q31格式以获得最佳性能。

// 混合精度处理

void mixed_precision_fft(q15_t *input_q15, q31_t *output_q31,

uint32_t size)

{

// 第一步：使用Q15进行预处理

q15_t windowed_q15[size];

q15_t window_q15[size];

generate_hann_window_q15(window_q15, size);

apply_window_q15(input_q15, window_q15, size);

// 第二步：转换为Q31进行FFT

q31_t input_q31[size];

for (uint32_t i = 0; i < size; i++) {

// Q15转Q31，左移16位

input_q31[i] = ((q31_t)input_q15[i]) << 16;

}

// 第三步：执行Q31 FFT

perform_fft_q31(input_q31, output_q31, size, 0, 1);

}

四、实时音频频谱分析实现

4.1 音频采集与预处理

// 实时音频处理结构

typedef struct {

q15_t buffer[FFT_SIZE];

q15_t window[FFT_SIZE];

q15_t fft_output[FFT_SIZE * 2];

uint16_t buffer_index;

uint8_t buffer_ready;

} audio_processor_t;

// 音频采集回调

void audio_callback(q15_t *audio_data, uint16_t length,

audio_processor_t *processor)

{

for (uint16_t i = 0; i < length; i++) {

processor->buffer[processor->buffer_index++] = audio_data[i];

if (processor->buffer_index >= FFT_SIZE) {

processor->buffer_index = 0;

processor->buffer_ready = 1;

// 触发频谱分析

process_spectrum(processor);

}

// 频谱分析处理

void process_spectrum(audio_processor_t *processor)

{

// 复制缓冲区数据

q15_t temp_buffer[FFT_SIZE];

memcpy(temp_buffer, processor->buffer, FFT_SIZE * sizeof(q15_t));

// 应用窗函数

apply_window_q15(temp_buffer, processor->window, FFT_SIZE);

// 执行FFT

arm_cfft_q15(&arm_cfft_sR_q15_len256, processor->fft_output, 0, 1);

// 计算幅度谱

compute_magnitude_spectrum(processor);

}

4.2 幅度谱计算优化

// 计算幅度谱（Q15格式）

void compute_magnitude_q15(const q15_t *fft_complex,

q15_t *magnitude, uint32_t size)

{

q31_t real, imag, mag_sq;

q63_t temp; // 使用Q63防止中间溢出

for (uint32_t i = 0; i < size/2; i++) { // 只取前一半（对称）

real = (q31_t)fft_complex[2*i];

imag = (q31_t)fft_complex[2*i + 1];

// 计算平方和：real^2 + imag^2

temp = ((q63_t)real * real) + ((q63_t)imag * imag);

// 平方根近似（快速算法）

magnitude[i] = sqrt_approximation_q31((q31_t)(temp >> 15));

}

// 快速平方根近似（适用于Q31）

q15_t sqrt_approximation_q31(q31_t x)

{

q31_t y, z;

if (x <= 0) return 0;

// 初始估计

y = x >> 1; // 除以2

// 牛顿迭代（一次迭代通常足够）

for (int i = 0; i < 3; i++) {

z = x / y;

y = (y + z) >> 1;

}

// 转换为Q15

return (q15_t)(y >> 8);

}

4.3 频率分仓与能量计算

// 分频带能量计算

#define NUM_BANDS 8

const uint16_t band_limits[NUM_BANDS + 1] = {0, 2, 4, 8, 16, 32, 64, 128, 256};

void compute_band_energy(const q15_t *magnitude,

q15_t *band_energy, uint32_t fft_size)

{

uint16_t bin_index = 0;

for (uint8_t band = 0; band < NUM_BANDS; band++) {

q63_t sum = 0;

uint16_t count = 0;

// 累加当前频带的能量

for (uint16_t i = band_limits[band];

i < band_limits[band + 1] && i < fft_size/2;

i++) {

sum += (q63_t)magnitude[i] * magnitude[i];

count++;

}

if (count > 0) {

// 计算平均值并转换为Q15

q31_t avg = (q31_t)(sum / count);

band_energy[band] = (q15_t)(avg >> 15);

} else {

band_energy[band] = 0;

}

五、性能优化技巧

5.1 内存访问优化

// 使用DMA加速数据传输

void setup_audio_dma(q15_t *src, q15_t *dst, uint32_t size)

{

// 配置DMA传输

// 使用双缓冲减少内存拷贝

DMA_ConfigTypeDef dma_config = {

.src_addr = (uint32_t)src,

.dst_addr = (uint32_t)dst,

.size = size * sizeof(q15_t),

.mode = DMA_CIRCULAR,

.data_width = DMA_DATA_WIDTH_HALFWORD

};

// 启动DMA传输

HAL_DMA_Start(&hdma_memtomem, dma_config.src_addr,

dma_config.dst_addr, dma_config.size);

}

5.2 循环展开与SIMD优化

// 手动循环展开优化

void optimized_window_apply(q15_t *data, const q15_t *window, uint32_t size)

{

uint32_t i = 0;

q31_t temp1, temp2, temp3, temp4;

// 每次处理4个样本

for (; i <= size - 4; i += 4) {

temp1 = (q31_t)data[i] * (q31_t)window[i];

temp2 = (q31_t)data[i+1] * (q31_t)window[i+1];

temp3 = (q31_t)data[i+2] * (q31_t)window[i+2];

temp4 = (q31_t)data[i+3] * (q31_t)window[i+3];

data[i] = (q15_t)(temp1 >> 15);

data[i+1] = (q15_t)(temp2 >> 15);

data[i+2] = (q15_t)(temp3 >> 15);

data[i+3] = (q15_t)(temp4 >> 15);

}

// 处理剩余样本

for (; i < size; i++) {

temp1 = (q31_t)data[i] * (q31_t)window[i];

data[i] = (q15_t)(temp1 >> 15);

}

5.3 缓存友好优化

// 优化数据布局提高缓存命中率

typedef struct {

q15_t real[FFT_SIZE];

q15_t imag[FFT_SIZE];

} complex_buffer_t;

// 使用结构数组(AoS)转换为数组结构(SoA)

void convert_aos_to_soa(const q15_t *complex_aos,

complex_buffer_t *complex_soa,

uint32_t size)

{

for (uint32_t i = 0; i < size; i++) {

complex_soa->real[i] = complex_aos[2*i];

complex_soa->imag[i] = complex_aos[2*i + 1];

}

六、调试与验证

6.1 性能测量

// 测量FFT执行时间

void measure_fft_performance(void)

{

uint32_t start_cycle, end_cycle;

// 开始计时

start_cycle = DWT->CYCCNT;

// 执行FFT

arm_cfft_q15(&arm_cfft_sR_q15_len256, fft_output, 0, 1);

// 结束计时

end_cycle = DWT->CYCCNT;

uint32_t cycles = end_cycle - start_cycle;

float time_ms = (float)cycles / (SystemCoreClock / 1000.0f);

printf("FFT execution: %lu cycles, %.3f ms\n", cycles, time_ms);

}

6.2 精度验证

// 与浮点FFT结果对比

void verify_fft_accuracy(void)

{

float32_t float_input[FFT_SIZE];

float32_t float_output[FFT_SIZE * 2];

q15_t fixed_input[FFT_SIZE];

q15_t fixed_output[FFT_SIZE * 2];

// 生成测试信号

for (uint32_t i = 0; i < FFT_SIZE; i++) {

float freq = 1000.0f; // 1kHz正弦波

float_input[i] = sinf(2 * 3.1415926535f * freq * i / 16000.0f);

fixed_input[i] = FLOAT_TO_Q15(float_input[i]);

}

// 执行浮点FFT

arm_rfft_fast_instance_f32 fft_instance_f32;

arm_rfft_fast_init_f32(&fft_instance_f32, FFT_SIZE);

arm_rfft_fast_f32(&fft_instance_f32, float_input, float_output, 0);

// 执行定点FFT

arm_cfft_q15(&arm_cfft_sR_q15_len256, fixed_output, 0, 1);

// 比较结果

float max_error = 0.0f;

for (uint32_t i = 0; i < FFT_SIZE; i++) {

float float_mag = sqrtf(float_output[2*i]*float_output[2*i] +

float_output[2*i+1]*float_output[2*i+1]);

q15_t fixed_real = fixed_output[2*i];

q15_t fixed_imag = fixed_output[2*i+1];

float fixed_mag = sqrtf(Q15_TO_FLOAT(fixed_real)*Q15_TO_FLOAT(fixed_real) +

Q15_TO_FLOAT(fixed_imag)*Q15_TO_FLOAT(fixed_imag));

float error = fabsf(float_mag - fixed_mag);

if (error > max_error) {

max_error = error;

}

printf("Maximum magnitude error: %.6f\n", max_error);

}

结语

在ARM Cortex-M处理器上使用CMSIS-DSP库进行实时音频频谱分析，定点数优化是平衡性能与精度的关键。通过合理选择Q15或Q31格式、优化预处理流程、利用硬件加速特性，可以在资源受限的嵌入式系统中实现高效的频谱分析。

数字信号处理：在ARM CMSIS-DSP库中使用FFT进行实时音频频谱分析的定点数优化

Persona AI 与 Under Armour 合作，探索用于人形机器人的高性能材料

引领未来出行变革，Arm 助力中国智能汽车加速升级

FPGA资源榨取：利用DSP Slice实现高效矩阵乘法运算

数字信号与模拟信号的本质区分

FPGA中的数字信号处理：CIC滤波器与FIR滤波器的资源消耗对比

IBM宣布与Arm达成战略合作，共同塑造企业计算的未来

当“架构之源”走向“芯片实体”，Arm用AGI CPU重绘数据中心版图

Arm押注物理 AI：四大层级计算平台破解实时闭环难题

Arm第一次亲自做芯片！发布全新AGI CPU

Arm打破中立自研AGI CPU芯片，亲自下场竞逐万亿美元市场

Arm 拓展其计算平台矩阵，首次跨足芯片产品

Arm AGI CPU 重磅发布：构筑代理式 AI 云时代的芯片基石

中断风暴的终结者：ARM Cortex-M内核的优先级抢占管理与中断延迟实测分析

混合信号技术在汽车电子单芯片中的应用方案

Arm Flexible Access订阅方案为初创芯片设计企业提供“降本”新路径

JTAG技术的原理与应用

为什么ARM Cortex-M要求4字节对齐而8051不需要？

电源输出波形转方波的调整方法与实践指南

动态功率控制在抑制电流输出数模转换器过热中的应用

节能信标无线感应定位：原理、应用详解