简单记录下python视频提取语音，语音转文字（web版本）-526互联

一、直接贴代码，有些离线文件需要下载，python依赖包也需要下载。

# coding=utf-8
from flask import Flask, render_template_string, jsonify, request
from flask_cors import CORS
from tkinter import filedialog
from pydub import AudioSegment
from noisereduce import reduce_noise
from vosk import Model, KaldiRecognizer
from gevent import pywsgi
import os
import shutil
import wave
import subprocess
import webbrowser
import math
import datetime

app = Flask(__name__)
# app.config['DEBUG'] = True
CORS(app)
# CORS(app, resources=r'/*')      # 解决跨域
# cors = CORS(app, resources={r"/api/*": {"origins": "*"}})   # 解决跨域


# 变量区域
base_path = os.path.dirname(__file__)  # 当前文件路径
video_file_path = None  # 视频文件路径
audio_path = None  # 音频路径
audio_file_path = None  # 音频文件路径
audio_path_jz = None  # 音频降噪解析路径
audio_file_path_jz = None  # 音频降噪文件路径
audio_path_split = None  # 音频分割路径
txt_path = None  # 解析结果保存路径
file_name = None  # 文件名称
file_suffix = None  # 文件后缀
text = ''     # 文字提取结果
temp_list = []      # 分片文件列表
logPath = f"{base_path}/log"


@app.route('/')
def index():
    return render_template_string(open(f'{base_path}/html/index.html', encoding='utf-8').read())


# 根据选择的视频路径初始化其他参数路径
def initData():
    global video_file_path, audio_path, audio_file_path, audio_path_jz, audio_file_path_jz, audio_path_split, \
        file_name, txt_path, file_suffix, temp_list
    temp_list = []
    writeLog('开始初始化参数。。。。。。')
    writeLog('--------------------------------------------------------------------------------')
    audio_path = f"{base_path}/files/audio"
    audio_path_jz = f"{base_path}/files/jiangzao"
    audio_path_split = f"{base_path}/files/audiosplit"
    file_name = os.path.basename(str(video_file_path)).split(".")[0]
    file_suffix = os.path.basename(str(video_file_path)).split(".")[1]
    audio_file_path = f"{audio_path}/{file_name}.wav"
    audio_file_path_jz = f"{audio_path_jz}/{file_name}.wav"
    txt_path = f"{base_path}/files/text"
    writeLog(f'音频路径：{audio_path}')
    writeLog(f'音频降噪路径： {audio_path_jz}')
    writeLog(f'音频切割文件路径： {audio_path_split}')
    writeLog(f'文件名称： {file_name}')
    writeLog(f'文件后缀： {file_suffix}')
    writeLog(f'音频文件路径： {audio_file_path}')
    writeLog(f'音频降噪文件路径： {audio_file_path_jz}')
    writeLog(f'文本和html保存路径： {txt_path}')


# 删除指定目录或文件
def deleteDirOrFile(path):
    writeLog(f'正在删除：{path}')
    if os.path.isdir(path):
        shutil.rmtree(path)
        writeLog(f'删除目录成功：{path}')
    elif os.path.isfile(path):
        os.remove(path)
        writeLog(f'删除文件成功：{path}')


# 读取文件夹下指定格式文件
def get_files_in_directory(path, targetSuffix):
    files = []
    for filename in os.listdir(path):
        file = f"{path}/{filename}"
        suffix = filename.split(".")[-1].upper()
        if not os.path.isdir(file) and suffix == targetSuffix.upper():
            files.append(file)
    return files


# 初始化目录
def initDir():
    if not os.path.exists(str(audio_path)):
        os.makedirs(str(audio_path))
    if not os.path.exists(str(audio_path_jz)):
        os.makedirs(str(audio_path_jz))
    if not os.path.exists(str(audio_path_split)):
        os.makedirs(str(audio_path_split))
    if not os.path.exists(str(txt_path)):
        os.makedirs(str(txt_path))


# 选择视频
@app.route('/api/choose/moviePath', methods=['GET', 'POST'])
def chooseMoviePath():
    global video_file_path
    videoFile = filedialog.askopenfile()
    if not videoFile:
        return
    videoPath = videoFile.name
    video_file_path = videoPath
    writeLog(f'选择的视频目录为{video_file_path}')
    initData()
    return jsonify(videoPath)


# 选择解析文本保存路径
# @app.route('/api/choose/textPath', methods=['GET'])
# def chooseTextPath():
#     global txt_path
#     txt_path = filedialog.askdirectory()
#     if not txt_path:
#         return
#     return jsonify(txt_path)


# 视频提取音频
@app.route('/api/trans/videoToAudio', methods=['POST'])
def videoToAudio():
    try:
        global temp_list
        temp_list = []
        initDir()
        deleteDirOrFile(audio_file_path)        # 覆盖写，写前删除原文件
        writeLog(f'开始从视频提取音频文件。。。。。。')
        subprocess.call(['ffmpeg', '-i', video_file_path, audio_file_path])
    except Exception as e:
        writeLog(f'视频提取音频失败！，错误信息：\n {e}')
        return jsonify({"code": 2000, "text": '视频提取音频失败', "e": str(e)})
    writeLog(f'视频提取音频完成！')
    return jsonify({"code": 1000, "text": '视频提取音频完成!'})


# 音频降噪
@app.route('/api/trans/noiseReduce', methods=['POST'])
def noiseReduce():
    try:
        initDir()
        writeLog('开始音频降噪......')
        audio = AudioSegment.from_wav(audio_file_path)
        audio_array = audio.get_array_of_samples()
        reduced_noise = reduce_noise(audio_array, audio.frame_rate)
        reduced_audio = AudioSegment(
            reduced_noise.tobytes(),
            frame_rate=audio.frame_rate,
            sample_width=audio.sample_width,
            channels=audio.channels
        )
        deleteDirOrFile(audio_file_path_jz)  # 覆盖写，写前删除原文件
        reduced_audio.export(audio_file_path_jz, format="wav")
    except Exception as e:
        writeLog(f'音频将找失败! 错误信息：{e}')
        return jsonify({"code": 2000, "text": '音频降噪失败', "e": str(e)})
    writeLog(f'音频降噪完成！')
    return jsonify({"code": 1000, "text": '音频降噪完成!'})


# 音频分片
@app.route('/api/trans/audioSplit', methods=['POST'])
def audioSplit():
    global temp_list
    temp_list = []
    try:
        initDir()
        writeLog('开始音频切片.......')
        wf = wave.open(str(audio_file_path_jz), "rb")
        nchannels = wf.getnchannels()
        sampwidth = wf.getsampwidth()
        framerate = wf.getframerate()  # 帧率
        nframes = wf.getnframes()  # 获取总的音频帧数

        duration = nframes / framerate
        print("音频文件时长：%.2fs" % duration)

        # 设置分割的长度为2s
        # length = 2 * framerate
        # length = 180 * framerate        # 分割长度3min
        length = 60 * framerate  # 分割长度3min
        start = 0
        count = math.ceil(nframes / length)

        for i in range(count):
            # 截取片段
            wf.setpos(start)
            data = wf.readframes(length)

            # 保存为新文件
            # new_wf = wave.open("files/videosplit/hz_%d.wav" % i, "wb")
            temp_list.append(f"{audio_path_split}/{file_name}_%d.wav" % i)
            deleteDirOrFile(f"{audio_path_split}/{file_name}_%d.wav" % i)  # 覆盖写，写前删除原文件
            new_wf = wave.open(f"{audio_path_split}/{file_name}_%d.wav" % i, "wb")
            new_wf.setnchannels(nchannels)
            new_wf.setsampwidth(sampwidth)
            new_wf.setframerate(framerate)
            new_wf.writeframes(data)
            new_wf.close()

            # 更新起始位置
            start += length
    except Exception as e:
        writeLog(f'音频切片失败！错误信息{e}')
        return jsonify({"code": 2000, "text": '音频分片失败', "e": str(e)})
    writeLog('音频切片完成!')
    return jsonify({"code": 1000, "text": '音频分片完成!'})


# 语音提取文字
@app.route('/api/trans/audioToText', methods=['POST'])
def audioToText():
    global text, temp_list
    text = ''
    try:
        initDir()
        global file_name
        sample_rate = 16000
        # model = Model("model")
        # rec = KaldiRecognizer(model, sample_rate)
        # audio_files = get_files_in_directory(audio_path_split, "wav")
        if len(temp_list) == 0:
            return
        deleteDirOrFile(f"{txt_path}/{file_name}.txt")  # 覆盖写，写前删除原文件
        f = open(f"{txt_path}/{file_name}.txt", "a", encoding="utf-8")
        writeLog('开始语音提取文字......')
        for file in temp_list:
            model = Model("model")
            rec = KaldiRecognizer(model, sample_rate)
            process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i', file, '-ar', str(sample_rate),
                                        '-ac', '1', '-f', 's16le', '-'], stdout=subprocess.PIPE)
            data = process.stdout.read()
            if len(data) == 0:
                continue
            rec.AcceptWaveform(data)
            re = rec.PartialResult()
            dictionary = eval(re)
            txt = dictionary['partial'].replace(' ', '')
            if txt == '':
                continue
            f.write(txt)
            text += txt
            aa = str(file)
            writeLog(f'语音提取文字{aa}完成!')
            print(f"语音提取文字{aa}完成")
        f.close()
    except Exception as e:
        writeLog(f'语音提取文字失败！错误信息{e}')
        return jsonify({"code": 2000, "text": '音频提取文字失败', "e": str(e)})
    writeLog('语音提取文字全部完成!')
    print('语音提取文字完成')
    return jsonify({"code": 1000, "text": "语音提取文字完成!"})


# 文本匹配关键字标记为关键字红色
@app.route('/api/trans/matchKeywords', methods=['POST'])
def matchKeywords():
    global text
    writeLog('开始匹配关键字......')
    if text is None:
        writeLog('关键字匹配失败！语音识别文件为空！')
        return jsonify("{code: 2000, text: '音频识别文件为空！'}")
    txt = text
    txt = txt.replace('[unk]', '')
    writeLog(f'识别的文本为： {txt}')
    keywords = request.args.get('keywords')
    if len(keywords) == 0:
        writeLog(f'关键字为空！')
        return jsonify("{code: 2000, text: '关键字为空！'}")
    writeLog(f'匹配的关键字为： {keywords}')
    keywordList = keywords.split("|")
    for key in keywordList:
        txt = txt.replace(key, f'<span style="color: red; font-weight: bold; font-size: 1.3rem;">{key}</span>')
    deleteDirOrFile(f"{txt_path}/{file_name}.html")  # 覆盖写，写前删除原文件
    f = open(f"{txt_path}/{file_name}.html", "w+", encoding="utf-8")
    f.write(f'<div style="color: #fff;">{txt}</div>')
    f.close()
    return jsonify({"code": 1000, "text": "关键字匹配完成!", "relativeHtmlPath": f"../files/text/{file_name}.html"})


# 浏览器打开首页
def startWeb():
    writeLog('开始启动浏览器打开首页。。。。。。')
    webbrowser.open_new_tab(f'{base_path}/html/index.html')
    writeLog('浏览器启动首页完成！')


# 关闭服务
def killService():
    writeLog('关闭已打开的进程。。。。。。')
    os.system('"taskkill /F /IM runme.exe"')
    writeLog('关闭重名服务成功！')
    # name = 'runme.exe'  # 进程名称
    # for proc in psutil.process_iter():
    #     if proc.name() == name:
    #         proc.kill()


# 写入日志
def writeLog(txt):
    global logPath
    if not os.path.exists(logPath):
        os.makedirs(logPath)
    now = datetime.datetime.now()
    formatted_date = now.strftime("%Y%m%d")
    with open(f"{logPath}/{formatted_date}.log", 'a') as file:
        file.write(f'{txt}\n')


if __name__ == '__main__':
    # killService()
    # app.run(port=8000)
    server = pywsgi.WSGIServer(('0.0.0.0', 8000), app)
    startWeb()
    server.serve_forever()

前端页面代码：

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
    <script src="../js/jquery.min.js"></script>
    <style>
        *{
            margin: 0;
            padding: 0;
            font-family: PingFangSC-Light, 微软雅黑, serif;
        }

        body,html{
            width: 100%;
            height: auto;
            color: #fff;
            background: url('../img/background.png') no-repeat;
            background-size: 100% 100%;
        }

        .header {
            width: 100%;
            height: 80px;
            padding:0;
            min-width: 1000px;
        }

        .bg_header{
            width: 100%;
            height: 80px;
            background: url(../img/title.png) no-repeat;
            background-size: 100% 100%;
        }
        .t_title {
            width: 100%;
            height: 100%;
            text-align: center;
            font-size: 2.5em;
            line-height: 80px;
            color: #fff;
        }
        .content {
            width: 100%;
            height: calc(100vh - 122px);
            display: flex;
        }

        .split-line {
            border-bottom: 2px solid #1E3661;
        }

        .btn-group {
            position: absolute;
            top: 70px;
            width: 100%;
        }

        .btn-group>ul {
            list-style: none;
        }

        .btn-group>ul>li {
            width: 90px;
            height: 35px;
            line-height: 35px;
            border: 1px solid blue;
            background-image: url("../img/bnt.png");
            background-repeat: no-repeat;
            background-color: #132748;
            text-align: center;
            margin-left: 20px;
            cursor: pointer;
        }

        .btn-group>ul>li>a {

        }

        .fl>li {
            float: left;
        }

        .fr>li {
            float: right;
        }

        /*************************** 内容左侧区域 ***************************/
        .content-left {
            width: 35%;
            height: auto;
            box-shadow: 20px 10px 100px 50px rgba(44,90,169, 0.2) inset;
            display: flex;
            flex-direction: column;
            flex-wrap: nowrap;
            justify-content: center;
            align-items: center;
            padding: 20px 10px;
        }

        .setting-content {
            width: 100%;
            height: 100%;
            display: flex;
            flex-direction: column;
            justify-content: start;
            align-items: flex-start;
            padding: 10px 10px;
        }

        .label_ul {
            list-style-type: none;
            margin-top: 10px;
        }

        .label_ul>li {
            position: relative;
            display: inline-block;
            width: auto;
            height: 25px;
            line-height: 25px;
            text-align: center;
            background-color: #1a95ff;
            border: 1px solid darkseagreen;
            border-radius: 2px;
            padding: 2px 10px;
            margin: 0 10px 10px 0;
        }

        .label_ul>li>label {
            position: relative;
            top: -17px;
            right: -14px;
            width: 15px;
            height: 15px;
            line-height: 15px;
            border-radius: 50%;
            cursor: pointer;
            color: red;
            /*background-color: antiquewhite;*/
        }

        .label_ul>li:last-child {
            width: 25px;
            height: 25px;
            line-height: 25px;
            text-align: center;
            color: #1a95ff;
            background-color: #0D2138;
            border: 1px solid silver;
            padding: 2px;
            margin: 0;
        }

        .label_ul>li:last-child>button {
            width: 25px;
            height: 25px;
            line-height: 25px;
            border: none;
            padding: 0;
            margin: 0;
            background-color: #0D2138;
            color: #1a95ff;
        }


        /*************************** 内容中间区域 ***************************/
        .content-center {
            width: 30%;
            height: auto;
            box-shadow: 20px -10px 100px 50px rgba(44,90,169, 0.2);
        }
        .content-btn {
            display: flex;
            flex-direction: row;
            flex-wrap: nowrap;
            justify-content: center;
            padding-top: 60px;
        }
        .circle-button {
            width: 300px;
            height: 100px;
            border-radius: 10%;
            background-color: #4CAF50;
            color: white;
            font-size: 16px;
            border: none;
            cursor: pointer;
            /*box-shadow: 10px -10px 80px 100px rgba(44,90,169, 0.2);*/
            box-shadow: 5px 10px #999;
        }

        .content-process {
            margin-top: 50px;
            min-height: 200px;
            background-color: #000c3b;
            height: calc(100% - 230px);
            padding: 10px 10px;
        }

        .circle-button:hover {
            background-color: #3e8e41;
        }

        .circle-button:active {
            background-color: #3e8e41;
            box-shadow: 0 5px #666;
            transform: translateY(4px);
        }


        /*************************** 内容右侧区域 ***************************/
        .content-right {
            width: 35%;
            height: auto;
            display: flex;
            flex-direction: column;
            flex-wrap: nowrap;
            justify-content: center;
            align-items: center;
            box-shadow: 20px 10px 100px 50px rgba(44,90,169, 0.2) inset;
            padding: 20px 10px;
        }

        .result-title {
            margin-bottom: 5px;
        }

        .result-content {
            color: white;
            width: 100%;
            height: 100%;
        }

        .btn-choose {
            height: 200px;
            width: 200px;
            border: 1px solid grey;
            margin-top: 10px;
        }

        /*---------------------- 弹窗 --------------------*/
        .dialog-div {
            position: fixed;
            top: 50%;
            left: 50%;
            transform: translate(-50%, -50%);
            z-index: 9999;
            background-color: #264D91;
            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.3);
        }

        .dialog-title {
            height: 40px;
            /*line-height: 40px;*/
            text-align: center;
        }

        .dialog-body {
            min-height: 60px;
            line-height: 60px;
            margin: 0 20px 5px 20px;
        }

        .dialog-foot {
            height: 40px;
            line-height: 40px;
            text-align: center;
        }

        .dialog-btn-sure {
            width: 60px;
            height: 25px;
            line-height: 25px;
        }

        .dialog-btn-cancel {
            width: 60px;
            height: 25px;
            line-height: 25px;
        }

        .close-btn {
            width: 20px;
            height: 20px;
            line-height: 20px;
            position: relative;
            text-align: center;
            top: 0;
            right: -240px;
            cursor: pointer;
        }

        .close-btn:hover {
            background-color: #5dadf3;
        }

    </style>
</head>
<body>

<div class="header">
    <div class="bg_header">
        <div class="header_nav fl t_title">
            智能视频语音识别平台
        </div>
    </div>
</div>

<div class="btn-group">
    <ul class="fl">
        <li onclick="alert(1)"><a>11111</a></li>
        <li><a>22222</a></li>
    </ul>
</div>

<div class="btn-group">
    <ul class="fr" style="margin-right: 20px;">
        <li onclick="alert(1)"><a>清理文件</a></li>
        <li><a>22222</a></li>
    </ul>
</div>

<div class="split-line" style="height: 40px"></div>

<div class="content">

    <div class="content-left">
        <div class="setting-content">
            <div style="padding: 10px 10px;">
                <h3>关键词</h3>
                <ul class="label_ul">
                    <li><span>转账</span><label>x</label></li>
                    <li><span>收钱</span><label>x</label></li>
                    <li><span>提取</span><label>x</label></li>
                    <li><span>取钱</span><label>x</label></li>
                    <li><span>解绑</span><label>x</label></li>
                    <li><span>换绑</span><label>x</label></li>
                    <li><span>绑定</span><label>x</label></li>
                    <li><span>短信</span><label>x</label></li>
                    <li><span>验证</span><label>x</label></li>
                    <li><button id="addLabel" onclick="addLabel()">+</button></li>
                </ul>

            </div>
            <div style="padding: 10px 10px;">
                <h3>选择视频</h3>
                <div style="text-align: center; width: 100%;">
                    <img id="fileChoseBtn" onclick="chooseMoviePath()" src="../img/add.png" alt="选择文件" class="btn-choose">
                    <div style="margin: 20px; text-align: left;">
                        <label >文件地址:</label><br><br>
                        <div id="videoPath" style="height: 25px; width: 30vw; border: 1px solid gainsboro;"></div>
                    </div>
                </div>
            </div>
        </div>
    </div>

    <div class="content-center">
        <div class="content-btn">
            <button onclick="start()" class="circle-button">开始解析</button>
        </div>
        <div class="content-process">
            <!-- 解析进度 -->
            <label id="lb1"></label><br>
            <label id="lb2"></label><br>
            <label id="lb3"></label><br>
            <label id="lb4"></label><br>
            <label id="lb5"></label><br>
        </div>
    </div>

    <div class="content-right">
        <h2 class="result-title">解析结果</h2>
        <div class="result-content">
            <iframe id="result" src="" style="width: 100%; height: 400px; border: 1px grey outset"></iframe>
        </div>
    </div>
</div>


<!-- 弹窗组件 -->
<div class="dialog-div" style="display: none;">
    <div class="close-btn">x</div>
    <div>
        <div class="dialog-title">
            标签选择
        </div>
        <div class="dialog-body">
            <form>
                <label for="label">输入标签</label>
                <input id="label" name="label" type="text" maxlength="8">
            </form>
        </div>
        <div class="dialog-foot">
            <button id="sure-btn" class="dialog-btn-sure" onclick="sure()">确认</button>
            <button id="cancel-btn" class="dialog-btn-cancel" onclick="cancel()">取消</button>
        </div>
    </div>
</div>

</body>
<script type="text/javascript">

    // 弹窗确认
    function sure() {

        var label = $('#label').val();

        var insertDom = $('<li><span>' + label + '</span><label>x</label></li>');
        var targetDom = $('#addLabel').parent();
        insertDom.insertBefore(targetDom);

        $('.label_ul label').click(function () {
            $(this).parent().remove();
        });

        $('.dialog-div').toggle();
    }

    // 弹窗取消
    function cancel() {
        $('.dialog-div').toggle();
    }


    // 添加标签
    var delLabelDom = $('.label_ul label');
    function addLabel() {
        $('.dialog-div').toggle();
    }

    // 监听删除标签
    delLabelDom.click(function() {
        $(this).parent().remove();
    });

    // 选择视频
    function chooseMoviePath() {
        $.ajax({
            url: 'http://127.0.0.1:8000/api/choose/moviePath',
            type: 'POST',
            dataType: 'json',
            success: function(res) {
                if(res) {
                    $('#videoPath').html(res);
                }
            }
        });
    }

    // 视频转音频
    function videoToAudio() {
        $.ajax({
            url: 'http://127.0.0.1:8000/api/trans/videoToAudio',
            type: 'POST',
            dataType: 'json',
            async: false,
            success: function(res) {
                $('#lb1').text(res.text);
                return res.code;
            }
        });
    }

    // 音频降噪
    function noiseReduce() {
        $.ajax({
            url: 'http://127.0.0.1:8000/api/trans/noiseReduce',
            type: 'POST',
            dataType: 'json',
            async: false,
            success: function(res) {
                $('#lb2').text(res.text);
                return res.code;
            }
        });
    }

    // 切片
    function audioSplit() {
        $.ajax({
            url: 'http://127.0.0.1:8000/api/trans/audioSplit',
            type: 'POST',
            dataType: 'json',
            async: false,
            success: function(res) {
                $('#lb3').text(res.text);
                return res.code;
            }
        });
    }

    // 音频转文字
    function audioToText() {
        $.ajax({
            url: 'http://127.0.0.1:8000/api/trans/audioToText',
            type: 'POST',
            dataType: 'json',
            async: false,
            success: function(res) {
                $('#lb4').text(res.text);
                return res.code;
            }
        });
    }

    // 匹配关键字
    function matchKeywords(keywords) {
        $.ajax({
            url: 'http://127.0.0.1:8000/api/trans/matchKeywords?keywords=' + keywords,
            type: 'POST',
            dataType: 'json',
            async: false,
            success: function(res) {
                var code = res.code;
                var relativeHtmlPath = res.relativeHtmlPath;
                if(code === 1000) {
                    var iframe = $('#result');
                    iframe.attr("src", relativeHtmlPath);
                }

                $('#lb5').text(res.text);
                return code;
            }
        });
    }

    function start() {

        var labels = [];
        var labelDoms = $('.label_ul li span');
        labelDoms.each(function (index, item) {
            labels.push($(item).text());
        })

        var keywords = labels.join('|');

        // 检查是否选择视频
        var videoPath =$('#videoPath').text();
        if(!videoPath) {
            alert('请选择要解析的视频!');
            return;
        }

        // 清空进度
        $('.content-process').find('label').text('');


        $('#lb1').text('开始从视频提取音频..........');
        sleep(1000);
        videoToAudio();

        $('#lb2').text('开始音频降噪..........');
        sleep(1000);
        noiseReduce();

        $('#lb3').text('开始音频切片..........');
        sleep(1000);
        audioSplit();

        $('#lb4').text('开始语音提取文字..........');
        sleep(1000);
        audioToText();

        $('#lb5').text('开始匹配关键字..........');
        sleep(1000);
        matchKeywords(keywords);


    }

    function sleep(ms) {
        return new Promise(resolve => setTimeout(resolve, ms));
    }


</script>
</html>

页面比较简单

需要下载的离线内容：