用ffmpeg实现ECNR-526互联

ECNR (Echo Cancellation and Noise Reduction) 是一种处理语音信号的技术，可以去除回声和噪声，提高通话质量。FFmpeg 是一个流行的开源多媒体框架，它可以实现对音频、视频等多种格式的编解码、过滤、转换等操作。

要使用 FFmpeg 实现 ECNR，需要先了解以下几个概念：

回声：当我们在手机或电脑上进行语音通话时，我们的声音会被录制、发送到对方的设备上，同时也会传回到自己的设备上，这就形成了回声。

噪声：环境中的杂音、电器噪声、交通噪声等都称为噪声。

ECNR 技术主要包括以下两个步骤：

回声消除：通过分析回声信号和原始信号之间的差异，估算出回声的特征（如延迟、衰减系数等），并将其从原始信号中减去，以消除回声。

噪声抑制：通过分析语音信号的频谱特征，估算出噪声的特征，然后采用滤波等方式抑制噪声，以提高语音质量。

下面是一个示例代码，展示了如何使用 FFmpeg 实现简单的 ECNR 功能。

```

include <stdio.h>
include <stdlib.h>
include <string.h>
include <math.h>
include <libavcodec/avcodec.h>
include <libavformat/avformat.h>
include <libavutil/opt.h>
include <libavutil/samplefmt.h>
include <libavutil/channel_layout.h>
define FFARRAYELEMS(a) (sizeof(a) / sizeof((a)[0]))
typedef struct { AVFrame *frame; int delay; } EchoContext;

static void echo_init(EchoContext *ctx, int delay) { ctx->frame = NULL; ctx->delay = delay; }

static void echo_filter(AVFrame *in_frame, AVFrame *out_frame, EchoContext *ctx) { if (!ctx->frame) { ctx->frame = avframealloc(); avframecopy(ctx->frame, inframe); return; } int nbsamples = inframe->nbsamples; int channels = inframe->channels; for (int i = 0; i < nbsamples; i++) { for (int j = 0; j < channels; j++) { int k = i - ctx->delay; if (k >= 0 && k < ctx->frame->nbsamples) { outframe->data[j][i] = inframe->data[j][i] - ctx->frame->data[j][k]; } else { outframe->data[j][i] = inframe->data[j][i]; } } } avframecopy(ctx->frame, inframe); }

typedef struct { AVFrame *frame; double noise_level; } NoiseContext;

static void noise_init(NoiseContext *ctx, double noiselevel) { ctx->frame = NULL; ctx->noiselevel = noise_level; }

static void noise_filter(AVFrame *in_frame, AVFrame *out_frame, NoiseContext *ctx) { if (!ctx->frame) { ctx->frame = avframealloc(); avframecopy(ctx->frame, inframe); return; } int nbsamples = inframe->nbsamples; int channels = inframe->channels; double scale = pow(10, -ctx->noiselevel / 20.0); // 转换为幅度比 for (int i = 0; i < nbsamples; i++) { for (int j = 0; j < channels; j++) { double x = inframe->data[j][i]; double n = scale * ctx->frame->data[j][i] + (1 - scale) * x; outframe->data[j][i] = x - n; } } avframecopy(ctx->frame, inframe); }

int main(int argc, char **argv) { const char *input_filename = "input.mp3"; const char *outputfilename = "output.mp3"; int delay = 100; // 回声延迟（单位：采样数） double noiselevel = 40; // 噪声水平（单位：分贝） int ret;

// 打开输入文件 AVFormatContext *fmt_ctx = NULL; ret = avformat_open_input(&fmt_ctx, input_filename, NULL, NULL); if (ret < 0) { fprintf(stderr, "Could not open input file '%s'\n", input_filename); exit(1); } // 查找音频流信息 ret = avformat_find_stream_info(fmt_ctx, NULL); if (ret < 0) { fprintf(stderr, "Could not find stream information\n"); exit(1); } // 查找音频流 int audio_stream_index = -1; for (int i = 0; i < fmt_ctx->nb_streams; i++) { if (fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { audio_stream_index = i; break; } } if (audio_stream_index == -1) { fprintf(stderr, "Could not find audio stream\n"); exit(1); } // 初始化音频解码器 AVCodec *codec = avcodec_find_decoder(fmt_ctx->streams[audio_stream_index]->codecpar->codec_id); if (!codec) { fprintf(stderr, "Unsupported codec\n"); exit(1); } AVCodecContext *codec_ctx = avcodec_alloc_context3(codec); avcodec_parameters_to_context(codec_ctx, fmt_ctx->streams[audio_stream_index]->codecpar); ret = avcodec_open2(codec_ctx, codec, NULL); if (ret < 0) { fprintf(stderr, "Could not open audio codec\n"); exit(1); } // 初始化音频编码器 AVCodec *out_codec = avcodec_find_encoder(AV_CODEC_ID_MP3); if (!out_codec) { fprintf(stderr, "Unsupported output codec\n"); exit(1); } AVCodecContext *out_codec_ctx = avcodec_alloc_context3(out_codec); out_codec_ctx->sample_rate = codec_ctx->sample_rate; out_codec_ctx->channel_layout = codec_ctx->channel_layout; out_codec_ctx->channels = codec_ctx->channels; out_codec_ctx->sample_fmt = out_codec->sample_fmts[0]; out_codec_ctx->bit_rate = 128000; out_codec_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; ret = avcodec_open2(out_codec_ctx, out_codec, NULL); if (ret < 0) { fprintf(stderr, "Could not open output codec\n"); exit(1); } // 初始化滤波器 EchoContext echo_ctx; echo_init(&echo_ctx, delay); NoiseContext noise_ctx; noise_init(&noise_ctx, noise_level); AVFilterGraph *filter_graph = avfilter_graph_alloc(); AVFilterContext *src_ctx = avfilter_graph_alloc_filter(filter_graph, avfilter_get_by_name("abuffer"), "src"); av_opt_set_bin(src_ctx, "channel_layout", (uint8_t *)&codec_ctx->channel_layout, sizeof(codec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN); av_opt_set_bin(src_ctx, "sample_fmt", (uint8_t *)&codec_ctx->sample_fmt, sizeof(codec_ctx->sample_fmt), AV_OPT_SEARCH_CHILDREN); av_opt_set_q(src_ctx, "time_base", codec_ctx->time_base, AV_OPT_SEARCH_CHILDREN); av_opt_set_int(src_ctx, "sample_rate", codec_ctx->sample_rate, AV_OPT_SEARCH_CHILDREN); AVFilterContext *echo_ctx1 = avfilter_graph_alloc_filter(filter_graph, avfilter_get_by_name("aecho"), "echo1"); av_opt_set("in_gain", "0.6", 0, echo_ctx1); av_opt_set("out_gain", "0.1", 0, echo_ctx1); av_opt_set("delays", "100|100", 0, echo_ctx1); AVFilterContext *noise_ctx1 = avfilter_graph_alloc_filter(filter_graph, avfilter_get_by_name("anoisesrc"), "noise1"); av_opt_set("a", "20*log10(0.01)", 0, noise_ctx1); av_opt_set("d", "0.5", 0, noise_ctx1); AVFilterContext *noise_ctx2 = avfilter_graph_alloc_filter(filter_graph, avfilter_get_by_name("anoise"), "noise2"); av_opt_set("c", "all", 0, noise_ctx2); AVFilterContext *sink_ctx = avfilter_graph_alloc_filter(filter_graph, avfilter_get_by_name("abuffersink"), "sink"); av_opt_set_bin(sink_ctx, "channel_layout", (uint8_t *)&out_codec_ctx->channel_layout, sizeof(out_codec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN); av_opt_set_bin(sink_ctx, "sample_fmt", (uint8_t *)&out_codec_ctx->sample_fmt, sizeof(out_codec_ctx->sample_fmt), AV_OPT_SEARCH_CHILDREN); av_opt_set_q(sink_ctx, "time_base", out_codec_ctx->time_base, AV_OPT_SEARCH_CHILDREN); ret = avfilter_link(src_ctx, 0, echo_ctx1, 0); if (ret < 0) { fprintf(stderr, "Could not link abuffer -> aecho\n"); exit(1); } ret = avfilter_link(echo_ctx1, 0, noise_ctx2, 0); if (ret < 0) { fprintf(stderr, "Could not link aecho -> anoise\n"); exit(1); } ret = avfilter_link(noise_ctx2, 0, sink_ctx, 0); if (ret < 0) { fprintf(stderr, "Could not link anoise -> abuffersink\n"); exit(1); } ret = avfilter_graph_config(filter_graph, NULL); if (ret < 0) { fprintf(stderr, "Could not configure filter graph\n"); exit(1); } // 初始化输入输出数据包 AVPacket *pkt = av_packet_alloc(); AVFrame *in_frame = av_frame_alloc(); AVFrame *echo_frame = av_frame_alloc(); AVFrame *noise_frame = av_frame_alloc(); AVFrame *out_frame = av_frame_alloc(); if (!pkt || !in_frame || !echo_frame || !noise_frame || !out_frame) { fprintf(stderr, "Failed to allocate packet/frame\n"); exit(1); } // 循环读取音频帧 while (1) { ret = av_read_frame(fmt_ctx, pkt); if (ret < 0) break; if (pkt->stream_index != audio_stream_index) { av_packet_unref(pkt); continue; } ret = avcodec_send_packet(codec_ctx, pkt); if (ret < 0) { fprintf(stderr, "Error sending a packet for decoding\n"); exit(1); } while (ret >= 0) { ret = avcodec_receive_frame(codec_ctx, in_frame); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break; if (ret < 0) { fprintf(stderr, "Error during decoding\n"); exit(1); } echo_filter(in_frame, echo_frame, &echo_ctx); noise_filter(echo_frame, noise_frame, &noise_ctx); av_frame_copy_props(out_frame, noise_frame); ret = av_buffersrc_add_frame_flags(src_ctx, noise_frame, 0); if (ret < 0) { fprintf(stderr, "Error submitting the frame to the filtergraph\n"); exit(1); } while (1) { ret = av_buffersink_get_frame(sink_ctx, out_frame); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break; if (ret < 0) { fprintf(stderr, "Error during filtering\n"); exit(1); } out_frame->pts = av_rescale_q(out_frame->pts, out_codec_ctx->time_base, codec_ctx->time_base); out_frame->pkt_dts = av_rescale_q(out_frame->pkt_dts, out_codec_ctx->time_base, codec_ctx->time_base); ret = avcodec_send_frame(out_codec_ctx, out_frame); if (ret < 0) { fprintf(stderr, "Error sending a frame for encoding\n"); exit(1); } while (ret >= 0) { ret = avcodec_receive_packet(out_codec_ctx, pkt); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break; if (ret < 0) { fprintf(stderr, "Error during encoding\n"); exit(1); } pkt->stream_index = 0; av_interleaved_write_frame(fmt_ctx, pkt); av_packet_unref(pkt); } av_frame_unref(out_frame); } } av_packet_unref(pkt); } // 清理资源 av_write_trailer(fmt_ctx); avcodec_free_context(&codec_ctx); avcodec_free_context(&out_codec_ctx); avformat_close_input(&fmt_ctx); avformat_free_context(fmt_ctx); av_frame_free(&in_frame); av_frame_free(&echo_frame); av_frame_free(&noise_frame); av_frame_free(&out_frame); av_packet_free(&pkt); avfilter_graph_free(&filter_graph); return 0;

} ```

这段代码主要实现了以下功能：

使用 FFmpeg 打开输入音频文件，并查找音频流信息和音频解码器；

初始化音频编码器，设置输出参数；

初始化滤波器，包括回声消除和噪声抑制两个部分；

循环读取音频帧，依次经过回声消除、噪声抑制、编码等处理，最终输出到目标文件中。

需要注意的是，这段代码只是一个示例，实际应用中可能需要根据具体需求进行调整。例如，可以根据实际情况调整回声延迟和噪声水平等参数，以获得更好的效果作者：零声教育诗诗 https://www.bilibili.com/read/cv23277584/ 出处：bilibili