[转]C#写一个后台运行的文字识别工具

发布时间 2023-04-03 09:20:29作者: CastleWu

最近做笔记需要一个截图后获取图中文字的轻型程序,最好直接按快捷键就能识别并将文字自动复制。网上的应该都是要钱的,或者东西太杂了看着乱得慌,于是决定自己写。我c#只稍微学了一点,讲的不好的地方代码不规范的地方见谅。

我使用的是百度的文字识别api,请先自己去申请一下资源,免费的,一个月免费识别1000次好像是。然后截图直接用的Snipaste这个软件,贼好用。

api主页:https://ai.baidu.com/tech/ocr/general

然后领取免费资源,创建应用使用。

一、创建项目

创建一个窗口应用项目

然后右键引用 - 管理NuGet程序包,安装Newtonsoft.Json和System.Net.Http

二、编写基本识别代码

首先去百度api的控制台获取一下api key和secret key,网址:百度智能云-登录

然后进入文档,里面请求的基本代码已经给我们写好了,直接复制就ok了,文字识别我们用高精度版的。

获取AccessToken文档:https://ai.baidu.com/ai-doc/REFERENCE/Ck3dwjhhu

文字识别文档:https://ai.baidu.com/ai-doc/OCR/1k3h7y3db

MainForm.cs

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Net.Http;
  4. using System.Text;
  5. using System.Windows.Forms;
  6. using Newtonsoft.Json;
  7. using System.Text.RegularExpressions;
  8. using System.IO;
  9. using System.Net;
  10. using System.Web;
  11. namespace TextRecognition
  12. {
  13. public partial class MainForm : Form
  14. {
  15. public MainForm()
  16. {
  17. InitializeComponent();
  18. }
  19. //调用接口所需令牌
  20. string ACCESS_TOKEN = "";
  21. //应用的api key
  22. string API_KEY = "控制台查询";
  23. //应用的secret key
  24. string API_SECRET = "控制台查询";
  25. //存储识别结果
  26. string resultText = "";
  27. private void MainForm_Load(object sender, EventArgs e)
  28. {
  29. getAccessToken();
  30. }
  31. //获取AccessToken
  32. private void getAccessToken()
  33. {
  34. String authHost = "https://aip.baidubce.com/oauth/2.0/token";
  35. HttpClient client = new HttpClient();
  36. List<KeyValuePair<String, String>> paraList = new List<KeyValuePair<string, string>>();
  37. paraList.Add(new KeyValuePair<string, string>("grant_type", "client_credentials"));
  38. paraList.Add(new KeyValuePair<string, string>("client_id", API_KEY));
  39. paraList.Add(new KeyValuePair<string, string>("client_secret", API_SECRET));
  40. HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
  41. String result = response.Content.ReadAsStringAsync().Result;
  42. //解析返回的json结果
  43. Dictionary<string, string> res = JsonConvert.DeserializeObject<Dictionary<string, string>>(result);
  44. res.TryGetValue("access_token", out ACCESS_TOKEN);
  45. }
  46. //处理识别功能被用户触发
  47. private void doRecognize()
  48. {
  49. IDataObject iData = Clipboard.GetDataObject();
  50. //只处理复制内容是图片的情况
  51. if (iData.GetDataPresent(DataFormats.Bitmap))
  52. {
  53. var image = Clipboard.GetImage();
  54. MemoryStream ms = new MemoryStream();
  55. image.Save(ms, System.Drawing.Imaging.ImageFormat.Png);
  56. byte[] arr = new byte[ms.Length];
  57. ms.Position = 0;
  58. ms.Read(arr, 0, (int)ms.Length);
  59. string base64 = Convert.ToBase64String(arr);
  60. try
  61. {
  62. recognize(base64);
  63. //把识别结果复制到剪切板
  64. Clipboard.SetText(resultText);
  65. }
  66. catch (Exception ex)
  67. {
  68. resultText = "异常:" + ex.ToString();
  69. }
  70. }
  71. }
  72. //调用api识别图片中的文字
  73. private void recognize(string base64)
  74. {
  75. string host = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=" + ACCESS_TOKEN;
  76. Encoding encoding = Encoding.Default;
  77. HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host);
  78. request.Method = "post";
  79. request.KeepAlive = true;
  80. // 图片的base64编码
  81. String str = "image=" + HttpUtility.UrlEncode(base64);
  82. byte[] buffer = encoding.GetBytes(str);
  83. request.ContentLength = buffer.Length;
  84. request.GetRequestStream().Write(buffer, 0, buffer.Length);
  85. HttpWebResponse response = (HttpWebResponse)request.GetResponse();
  86. StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
  87. string result = reader.ReadToEnd();
  88. //解析json太麻烦直接正则获取结果
  89. string pattern = "\"words\":\"(.*?)\"";
  90. StringBuilder sb = new StringBuilder();
  91. foreach (Match match in Regex.Matches(result, pattern))
  92. {
  93. sb.AppendLine(match.Groups[1].ToString());
  94. }
  95. resultText = sb.ToString();
  96. Console.WriteLine(resultText);
  97. }
  98. }
  99. }

此时可以在窗口里添加个按钮测试一下doRecognize识别功能,用截图工具截个图,再点按钮,可以看到他已经识别完成并且给我们把结果复制到剪切板了

三、监听快捷键按下

我想实现当按下alt+3的时候执行doReconize函数,需要全局hook键盘。

首先在项目里创建类文件Hook.cs,然后编辑内容

Hook.cs

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Linq;
  5. using System.Runtime.InteropServices;
  6. using System.Text;
  7. using System.Threading.Tasks;
  8. using System.Windows.Forms;
  9. namespace TextRecognition
  10. {
  11. class Hook
  12. {
  13. [DllImport("user32.dll")]
  14. public static extern int SetWindowsHookEx(int idHook, HookProc lpfn, IntPtr hInstance, int threadId);
  15. [DllImport("user32.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
  16. public static extern bool UnhookWindowsHookEx(int idHook);
  17. [DllImport("user32.dll")]
  18. public static extern int CallNextHookEx(int idHook, int nCode, int wParam, IntPtr lParam);
  19. [DllImport("kernel32.dll")]
  20. public static extern int GetCurrentThreadId();
  21. [DllImport("kernel32.dll")]
  22. public static extern IntPtr GetModuleHandle(string name);
  23. [DllImport("User32.dll")]
  24. public static extern void keybd_event(Byte bVk, Byte bScan, Int32 dwFlags, Int32 dwExtraInfo);
  25. //键盘hook
  26. public const int WH_KEYBOARD_LL = 13;
  27. public delegate int HookProc(int nCode, int wParam, IntPtr lParam);
  28. //回调
  29. HookProc KeyBoardHookProcedure;
  30. public int hHook;
  31. //hook到的消息结构
  32. [StructLayout(LayoutKind.Sequential)]
  33. public class KeyBoardHookStruct
  34. {
  35. public int vkCode;
  36. public int scanCode;
  37. public int flags;
  38. public int time;
  39. public int dwExtraInfo;
  40. }
  41. //开启hook
  42. public void Hook_Start()
  43. {
  44. KeyBoardHookProcedure = new HookProc(KeyBoardHookProc);
  45. hHook = SetWindowsHookEx(WH_KEYBOARD_LL, KeyBoardHookProcedure, GetModuleHandle(Process.GetCurrentProcess().MainModule.ModuleName), 0);
  46. }
  47. //关闭hook
  48. public void Hook_Clear()
  49. {
  50. bool retKeyboard = true;
  51. retKeyboard = UnhookWindowsHookEx(hHook);
  52. hHook = 0;
  53. }
  54. //键盘hook到之后的操作
  55. private int KeyBoardHookProc(int nCode, int wParam, IntPtr lParam)
  56. {
  57. if (nCode >= 0)
  58. {
  59. KeyBoardHookStruct kbh = (KeyBoardHookStruct)Marshal.PtrToStructure(lParam, typeof(KeyBoardHookStruct));
  60. //如果按下alt + 3
  61. if (kbh.vkCode == (int)Keys.D3 && (int)Control.ModifierKeys == (int)Keys.Alt)
  62. {
  63. Console.WriteLine("按了alt+3");
  64. //键盘消息不再往下传递
  65. return 1;
  66. }
  67. }
  68. return CallNextHookEx(hHook, nCode, wParam, lParam);
  69. }
  70. }
  71. }

 再来看一下控制台输出,发现可以获取到按下的事件了,如果想换成别的组合键可以上网搜一下该怎么写。

四、按键触发doRecognize函数

我们按下alt+3被hook到之后,要调用MainForm.cs里定义的doRecognize函数。当时学的时候没学太多,只记得个委托什么什么的可以实现这样的功能,我就用这个委托实现了。当然也有别的方式,比如直接抽离成工具类什么的。

我讲不大清楚,大家既然都用c#了应该比我懂得多,我就直接放代码了。

Hook.cs

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Linq;
  5. using System.Runtime.InteropServices;
  6. using System.Text;
  7. using System.Threading.Tasks;
  8. using System.Windows.Forms;
  9. namespace 文字识别
  10. {
  11. class KeyHooks
  12. {
  13. //以下只展示新增or修改的部分
  14. public delegate void recHandler(object sender, EventArgs e);
  15. public event recHandler handler;
  16. //键盘hook到之后的操作
  17. private int KeyBoardHookProc(int nCode, int wParam, IntPtr lParam)
  18. {
  19. if (nCode >= 0)
  20. {
  21. KeyBoardHookStruct kbh = (KeyBoardHookStruct)Marshal.PtrToStructure(lParam, typeof(KeyBoardHookStruct));
  22. if (kbh.vkCode == (int)Keys.D3
  23. && (int)Control.ModifierKeys == (int)Keys.Alt
  24. && this.handler != null)
  25. {
  26. Console.WriteLine("按了alt+3");
  27. this.handler(this, new EventArgs());
  28. return 1;
  29. }
  30. }
  31. return CallNextHookEx(hHook, nCode, wParam, lParam);
  32. }
  33. }
  34. }

 MainForm.cs

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Net.Http;
  4. using System.Text;
  5. using System.Windows.Forms;
  6. using Newtonsoft.Json;
  7. using System.Text.RegularExpressions;
  8. using System.IO;
  9. using System.Net;
  10. using System.Web;
  11. namespace TextRecognition
  12. {
  13. public partial class MainForm : Form
  14. {
  15. //也只展示新增、修改部分
  16. private void MainForm_Load(object sender, EventArgs e)
  17. {
  18. getAccessToken();
  19. hook.Hook_Start();
  20. hook.handler += new Hook.recHandler(handleRec);
  21. }
  22. public void handleRec(object sender, EventArgs e)
  23. {
  24. doRecognize();
  25. }
  26. }
  27. }

五、优化

我想让他一直在后台,别给我一启动蹦出一个窗口,我还要避免这个程序重复运行。

1.设置后台运行

首先我们把NotifyIcon这个组件拖入到窗口里,再拖个ContextMenuStrip来配置右键展示的菜单

设置一下NotifyIcon的icon,随便找个xxx.ico文件吧,然后修改Text属性,再把ContextMenuStrip属性修改为刚添加的那个context menu strip,然后这个就能在右下角看到了(这个b水印真烦啊)

 再编辑一下ContextMenuStrip,我创建了复制上一次识别结果和退出两个选项,双击他们给他们添加点击事件

  1. private void copyMenuItem_Click(object sender, EventArgs e)
  2. {
  3. Clipboard.SetText(resultText);
  4. }
  5. private void exitMenuItem_Click(object sender, EventArgs e)
  6. {
  7. this.Close();
  8. }

再让窗体隐藏,将主窗体的ShowlnTaskbar属性设为 False,将其 WindowState属性设为 Minimized,程序即可变成后台运行程序。

2.防止重复开启

很简单,在Program.cs里写代码

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Threading.Tasks;
  5. using System.Windows.Forms;
  6. namespace TextRecognition
  7. {
  8. static class Program
  9. {
  10. /// <summary>
  11. /// 应用程序的主入口点。
  12. /// </summary>
  13. [STAThread]
  14. static void Main()
  15. {
  16. bool unique;
  17. Console.WriteLine(Application.ProductName);
  18. System.Threading.Mutex mutex = new System.Threading.Mutex(true, Application.ProductName, out unique);
  19. if (!unique)
  20. {
  21. MessageBox.Show("请勿重复运行此程序!", "错误", MessageBoxButtons.OK, MessageBoxIcon.Asterisk);
  22. return;
  23. }
  24. Application.EnableVisualStyles();
  25. Application.SetCompatibleTextRenderingDefault(false);
  26. Application.Run(new MainForm());
  27. }
  28. }
  29. }

ok了,运行一下玩玩把

转自https://blog.csdn.net/m0_52640724/article/details/126994378