用ffmpeg实现ECNR

发布时间 2023-09-12 08:55:04作者: 阿风小子

ECNR (Echo Cancellation and Noise Reduction) 是一种处理语音信号的技术,可以去除回声和噪声,提高通话质量。FFmpeg 是一个流行的开源多媒体框架,它可以实现对音频、视频等多种格式的编解码、过滤、转换等操作。

要使用 FFmpeg 实现 ECNR,需要先了解以下几个概念:

回声:当我们在手机或电脑上进行语音通话时,我们的声音会被录制、发送到对方的设备上,同时也会传回到自己的设备上,这就形成了回声。

噪声:环境中的杂音、电器噪声、交通噪声等都称为噪声。

ECNR 技术主要包括以下两个步骤:

回声消除:通过分析回声信号和原始信号之间的差异,估算出回声的特征(如延迟、衰减系数等),并将其从原始信号中减去,以消除回声。

噪声抑制:通过分析语音信号的频谱特征,估算出噪声的特征,然后采用滤波等方式抑制噪声,以提高语音质量。

下面是一个示例代码,展示了如何使用 FFmpeg 实现简单的 ECNR 功能。

```

include <stdio.h>
include <stdlib.h>
include <string.h>
include <math.h>
include <libavcodec/avcodec.h>
include <libavformat/avformat.h>
include <libavutil/opt.h>
include <libavutil/samplefmt.h>
include <libavutil/channel_layout.h>
define FFARRAYELEMS(a) (sizeof(a) / sizeof((a)[0]))
typedef struct { AVFrame *frame; int delay; } EchoContext;

static void echo_init(EchoContext *ctx, int delay) { ctx->frame = NULL; ctx->delay = delay; }

static void echo_filter(AVFrame *in_frame, AVFrame *out_frame, EchoContext *ctx) { if (!ctx->frame) { ctx->frame = avframealloc(); avframecopy(ctx->frame, inframe); return; } int nbsamples = inframe->nbsamples; int channels = inframe->channels; for (int i = 0; i < nbsamples; i++) { for (int j = 0; j < channels; j++) { int k = i - ctx->delay; if (k >= 0 && k < ctx->frame->nbsamples) { outframe->data[j][i] = inframe->data[j][i] - ctx->frame->data[j][k]; } else { outframe->data[j][i] = inframe->data[j][i]; } } } avframecopy(ctx->frame, inframe); }

typedef struct { AVFrame *frame; double noise_level; } NoiseContext;

static void noise_init(NoiseContext *ctx, double noiselevel) { ctx->frame = NULL; ctx->noiselevel = noise_level; }

static void noise_filter(AVFrame *in_frame, AVFrame *out_frame, NoiseContext *ctx) { if (!ctx->frame) { ctx->frame = avframealloc(); avframecopy(ctx->frame, inframe); return; } int nbsamples = inframe->nbsamples; int channels = inframe->channels; double scale = pow(10, -ctx->noiselevel / 20.0); // 转换为幅度比 for (int i = 0; i < nbsamples; i++) { for (int j = 0; j < channels; j++) { double x = inframe->data[j][i]; double n = scale * ctx->frame->data[j][i] + (1 - scale) * x; outframe->data[j][i] = x - n; } } avframecopy(ctx->frame, inframe); }

int main(int argc, char **argv) { const char *input_filename = "input.mp3"; const char *outputfilename = "output.mp3"; int delay = 100; // 回声延迟(单位:采样数) double noiselevel = 40; // 噪声水平(单位:分贝) int ret;

// 打开输入文件 AVFormatContext *fmt_ctx = NULL; ret = avformat_open_input(&fmt_ctx, input_filename, NULL, NULL); if (ret < 0) {    fprintf(stderr, "Could not open input file '%s'\n", input_filename);    exit(1); } // 查找音频流信息 ret = avformat_find_stream_info(fmt_ctx, NULL); if (ret < 0) {    fprintf(stderr, "Could not find stream information\n");    exit(1); } // 查找音频流 int audio_stream_index = -1; for (int i = 0; i < fmt_ctx->nb_streams; i++) {    if (fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {        audio_stream_index = i;        break;    } } if (audio_stream_index == -1) {    fprintf(stderr, "Could not find audio stream\n");    exit(1); } // 初始化音频解码器 AVCodec *codec = avcodec_find_decoder(fmt_ctx->streams[audio_stream_index]->codecpar->codec_id); if (!codec) {    fprintf(stderr, "Unsupported codec\n");    exit(1); } AVCodecContext *codec_ctx = avcodec_alloc_context3(codec); avcodec_parameters_to_context(codec_ctx, fmt_ctx->streams[audio_stream_index]->codecpar); ret = avcodec_open2(codec_ctx, codec, NULL); if (ret < 0) {    fprintf(stderr, "Could not open audio codec\n");    exit(1); } // 初始化音频编码器 AVCodec *out_codec = avcodec_find_encoder(AV_CODEC_ID_MP3); if (!out_codec) {    fprintf(stderr, "Unsupported output codec\n");    exit(1); } AVCodecContext *out_codec_ctx = avcodec_alloc_context3(out_codec); out_codec_ctx->sample_rate = codec_ctx->sample_rate; out_codec_ctx->channel_layout = codec_ctx->channel_layout; out_codec_ctx->channels = codec_ctx->channels; out_codec_ctx->sample_fmt = out_codec->sample_fmts[0]; out_codec_ctx->bit_rate = 128000; out_codec_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; ret = avcodec_open2(out_codec_ctx, out_codec, NULL); if (ret < 0) {    fprintf(stderr, "Could not open output codec\n");    exit(1); } // 初始化滤波器 EchoContext echo_ctx; echo_init(&echo_ctx, delay); NoiseContext noise_ctx; noise_init(&noise_ctx, noise_level); AVFilterGraph *filter_graph = avfilter_graph_alloc(); AVFilterContext *src_ctx = avfilter_graph_alloc_filter(filter_graph, avfilter_get_by_name("abuffer"), "src"); av_opt_set_bin(src_ctx, "channel_layout", (uint8_t *)&codec_ctx->channel_layout, sizeof(codec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN); av_opt_set_bin(src_ctx, "sample_fmt", (uint8_t *)&codec_ctx->sample_fmt, sizeof(codec_ctx->sample_fmt), AV_OPT_SEARCH_CHILDREN); av_opt_set_q(src_ctx, "time_base", codec_ctx->time_base, AV_OPT_SEARCH_CHILDREN); av_opt_set_int(src_ctx, "sample_rate", codec_ctx->sample_rate, AV_OPT_SEARCH_CHILDREN); AVFilterContext *echo_ctx1 = avfilter_graph_alloc_filter(filter_graph, avfilter_get_by_name("aecho"), "echo1"); av_opt_set("in_gain", "0.6", 0, echo_ctx1); av_opt_set("out_gain", "0.1", 0, echo_ctx1); av_opt_set("delays", "100|100", 0, echo_ctx1); AVFilterContext *noise_ctx1 = avfilter_graph_alloc_filter(filter_graph, avfilter_get_by_name("anoisesrc"), "noise1"); av_opt_set("a", "20*log10(0.01)", 0, noise_ctx1); av_opt_set("d", "0.5", 0, noise_ctx1); AVFilterContext *noise_ctx2 = avfilter_graph_alloc_filter(filter_graph, avfilter_get_by_name("anoise"), "noise2"); av_opt_set("c", "all", 0, noise_ctx2); AVFilterContext *sink_ctx = avfilter_graph_alloc_filter(filter_graph, avfilter_get_by_name("abuffersink"), "sink"); av_opt_set_bin(sink_ctx, "channel_layout", (uint8_t *)&out_codec_ctx->channel_layout, sizeof(out_codec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN); av_opt_set_bin(sink_ctx, "sample_fmt", (uint8_t *)&out_codec_ctx->sample_fmt, sizeof(out_codec_ctx->sample_fmt), AV_OPT_SEARCH_CHILDREN); av_opt_set_q(sink_ctx, "time_base", out_codec_ctx->time_base, AV_OPT_SEARCH_CHILDREN); ret = avfilter_link(src_ctx, 0, echo_ctx1, 0); if (ret < 0) {    fprintf(stderr, "Could not link abuffer -> aecho\n");    exit(1); } ret = avfilter_link(echo_ctx1, 0, noise_ctx2, 0); if (ret < 0) {    fprintf(stderr, "Could not link aecho -> anoise\n");    exit(1); } ret = avfilter_link(noise_ctx2, 0, sink_ctx, 0); if (ret < 0) {    fprintf(stderr, "Could not link anoise -> abuffersink\n");    exit(1); } ret = avfilter_graph_config(filter_graph, NULL); if (ret < 0) {    fprintf(stderr, "Could not configure filter graph\n");    exit(1); } // 初始化输入输出数据包 AVPacket *pkt = av_packet_alloc(); AVFrame *in_frame = av_frame_alloc(); AVFrame *echo_frame = av_frame_alloc(); AVFrame *noise_frame = av_frame_alloc(); AVFrame *out_frame = av_frame_alloc(); if (!pkt || !in_frame || !echo_frame || !noise_frame || !out_frame) {    fprintf(stderr, "Failed to allocate packet/frame\n");    exit(1); } // 循环读取音频帧 while (1) {    ret = av_read_frame(fmt_ctx, pkt);    if (ret < 0)        break;    if (pkt->stream_index != audio_stream_index) {        av_packet_unref(pkt);        continue;    }    ret = avcodec_send_packet(codec_ctx, pkt);    if (ret < 0) {        fprintf(stderr, "Error sending a packet for decoding\n");        exit(1);    }    while (ret >= 0) {        ret = avcodec_receive_frame(codec_ctx, in_frame);        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)            break;        if (ret < 0) {            fprintf(stderr, "Error during decoding\n");            exit(1);        }        echo_filter(in_frame, echo_frame, &echo_ctx);        noise_filter(echo_frame, noise_frame, &noise_ctx);        av_frame_copy_props(out_frame, noise_frame);        ret = av_buffersrc_add_frame_flags(src_ctx, noise_frame, 0);        if (ret < 0) {            fprintf(stderr, "Error submitting the frame to the filtergraph\n");            exit(1);        }        while (1) {            ret = av_buffersink_get_frame(sink_ctx, out_frame);            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)                break;            if (ret < 0) {                fprintf(stderr, "Error during filtering\n");                exit(1);            }            out_frame->pts = av_rescale_q(out_frame->pts, out_codec_ctx->time_base, codec_ctx->time_base);            out_frame->pkt_dts = av_rescale_q(out_frame->pkt_dts, out_codec_ctx->time_base, codec_ctx->time_base);            ret = avcodec_send_frame(out_codec_ctx, out_frame);            if (ret < 0) {                fprintf(stderr, "Error sending a frame for encoding\n");                exit(1);            }            while (ret >= 0) {                ret = avcodec_receive_packet(out_codec_ctx, pkt);                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)                    break;                if (ret < 0) {                    fprintf(stderr, "Error during encoding\n");                    exit(1);                }                pkt->stream_index = 0;                av_interleaved_write_frame(fmt_ctx, pkt);                av_packet_unref(pkt);            }            av_frame_unref(out_frame);        }    }    av_packet_unref(pkt); } // 清理资源 av_write_trailer(fmt_ctx); avcodec_free_context(&codec_ctx); avcodec_free_context(&out_codec_ctx); avformat_close_input(&fmt_ctx); avformat_free_context(fmt_ctx); av_frame_free(&in_frame); av_frame_free(&echo_frame); av_frame_free(&noise_frame); av_frame_free(&out_frame); av_packet_free(&pkt); avfilter_graph_free(&filter_graph); return 0;

} ```

这段代码主要实现了以下功能:

使用 FFmpeg 打开输入音频文件,并查找音频流信息和音频解码器;

初始化音频编码器,设置输出参数;

初始化滤波器,包括回声消除和噪声抑制两个部分;

循环读取音频帧,依次经过回声消除、噪声抑制、编码等处理,最终输出到目标文件中。

需要注意的是,这段代码只是一个示例,实际应用中可能需要根据具体需求进行调整。例如,可以根据实际情况调整回声延迟和噪声水平等参数,以获得更好的效果 作者:零声教育诗诗 https://www.bilibili.com/read/cv23277584/ 出处:bilibili