在日常工作和生活中,我们常常需要处理大量的图片文件,这些图片可能包含重要的文字信息。手动识别这些文字并进行相应的处理(如重命名图片文件)既耗时又容易出错。为了解决这一问题,本项目旨在开发一个基于WPF(Windows Presentation Foundation)的桌面应用程序,结合腾讯OCR(光学字符识别)技术,实现批量识别图片中的文字并根据识别结果对图片进行重命名或区域内容识别后处理。
通过本项目,用户可以:
WPF提供了丰富的UI组件和灵活的布局方式,适合构建功能强大且用户友好的桌面应用。以下是该应用的主要界面设计元素:
ImageOCRRenamer/
├── ImageOCRRenamer/
│ ├── MainWindow.xaml
│ ├── MainWindow.xaml.cs
│ ├── OcrService.cs
│ ├── ImageProcessor.cs
│ ├── Models/
│ │ ├── ImageItem.cs
│ │ └── OcrResult.cs
│ ├── Services/
│ │ └── TencentOcrClient.cs
│ ├── Views/
│ │ └── ImageListView.xaml
│ └── App.xaml
├── Resources/
│ └── styles.xaml
└── packages.config
bashInstall-Package TencentCloudSDKCSharp
Install-Package MahApps.Metro
首先,需要在腾讯云控制台开通OCR服务,并获取API密钥(SecretId和SecretKey)。
xml<Window x:Class="ImageOCRRenamer.MainWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:local="clr-namespace:ImageOCRRenamer"
Title="图片OCR批量重命名工具" Height="600" Width="800">
<Grid>
<!-- 菜单栏 -->
<Menu Grid.Row="0">
<MenuItem Header="文件">
<MenuItem Header="选择图片文件夹" Click="SelectFolder_Click"/>
<MenuItem Header="退出" Click="Exit_Click"/>
</MenuItem>
<MenuItem Header="帮助">
<MenuItem Header="关于"/>
<MenuItem Header="帮助文档"/>
</MenuItem>
</Menu>
<!-- 工具栏 -->
<ToolBarTray Grid.Row="1">
<ToolBar>
<Button Content="选择图片文件夹" Click="SelectFolder_Click"/>
<Button Content="开始识别" Click="StartRecognition_Click" IsEnabled="{Binding IsImagesSelected}"/>
</ToolBar>
</ToolBarTray>
<!-- 图片列表展示 -->
<ListView x:Name="ImageListView" Grid.Row="2" ItemsSource="{Binding ImageItems}" SelectionMode="Extended">
<ListView.View>
<GridView>
<GridViewColumn Header="缩略图" Width="100">
<GridViewColumn.CellTemplate>
<DataTemplate>
<Image Source="{Binding Thumbnail}" Width="80" Height="80"/>
</DataTemplate>
</GridViewColumn.CellTemplate>
</GridViewColumn>
<GridViewColumn Header="文件名" DisplayMemberBinding="{Binding FileName}" Width="200"/>
<GridViewColumn Header="识别状态" DisplayMemberBinding="{Binding Status}" Width="150"/>
<GridViewColumn Header="OCR结果" Width="300">
<GridViewColumn.CellTemplate>
<DataTemplate>
<TextBlock Text="{Binding OcrResult}" TextWrapping="Wrap"/>
</DataTemplate>
</GridViewColumn.CellTemplate>
</GridViewColumn>
</GridView>
</ListView.View>
</ListView>
<!-- 进度条 -->
<ProgressBar x:Name="ProgressBar" Grid.Row="3" Height="25" Value="{Binding Progress}" Minimum="0" Maximum="100" Visibility="{Binding IsProcessing, Converter={StaticResource BooleanToVisibilityConverter}}"/>
<!-- 日志输出 -->
<TextBox x:Name="LogTextBox" Grid.Row="4" IsReadOnly="True" ScrollViewer.VerticalScrollBarVisibility="Auto" Height="100" VerticalAlignment="Bottom"/>
</Grid>
</Window>
csharpusing System;
using System.Collections.ObjectModel;
using System.ComponentModel;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Windows;
using System.Windows.Controls;
namespace ImageOCRRenamer
{
public partial class MainWindow : Window, INotifyPropertyChanged
{
private ObservableCollection<ImageItem> _imageItems;
private bool _isProcessing;
private int _progress;
public ObservableCollection<ImageItem> ImageItems
{
get => _imageItems;
set
{
_imageItems = value;
OnPropertyChanged();
}
}
public bool IsProcessing
{
get => _isProcessing;
set
{
if (_isProcessing != value)
{
_isProcessing = value;
OnPropertyChanged();
StartRecognitionButton.IsEnabled = !value;
}
}
}
public int Progress
{
get => _progress;
set
{
_progress = value;
OnPropertyChanged();
ProgressBar.Value = value;
}
}
public bool IsImagesSelected => ImageItems.Any(item => !string.IsNullOrEmpty(item.FilePath));
public MainWindow()
{
InitializeComponent();
DataContext = this;
ImageItems = new ObservableCollection<ImageItem>();
}
private void SelectFolder_Click(object sender, RoutedEventArgs e)
{
var folderDialog = new System.Windows.Forms.FolderBrowserDialog();
if (folderDialog.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
string folderPath = folderDialog.SelectedPath;
LoadImages(folderPath);
}
}
private void LoadImages(string folderPath)
{
ImageItems.Clear();
string[] files = Directory.GetFiles(folderPath, "*.*", SearchOption.AllDirectories)
.Where(s => s.EndsWith(".png", StringComparison.OrdinalIgnoreCase) ||
s.EndsWith(".jpg", StringComparison.OrdinalIgnoreCase) ||
s.EndsWith(".jpeg", StringComparison.OrdinalIgnoreCase) ||
s.EndsWith(".bmp", StringComparison.OrdinalIgnoreCase)).ToArray();
foreach (var file in files)
{
ImageItems.Add(new ImageItem
{
FilePath = file,
FileName = Path.GetFileName(file),
Thumbnail = GenerateThumbnail(file)
});
}
}
private System.Windows.Media.Imaging.BitmapImage GenerateThumbnail(string filePath)
{
try
{
using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read))
{
var bitmap = new System.Windows.Media.Imaging.BitmapImage();
bitmap.BeginInit();
bitmap.StreamSource = stream;
bitmap.CacheOption = System.Windows.Media.Imaging.BitmapCacheOption.OnLoad;
bitmap.EndInit();
bitmap.Freeze(); // 必须冻结以在UI线程外使用
return bitmap;
}
}
catch
{
return null;
}
}
private async void StartRecognition_Click(object sender, RoutedEventArgs e)
{
if (string.IsNullOrEmpty(Properties.Settings.Default.TencentSecretId) ||
string.IsNullOrEmpty(Properties.Settings.Default.TencentSecretKey))
{
MessageBox.Show("请先配置腾讯云的SecretId和SecretKey!");
return;
}
IsProcessing = true;
Progress = 0;
var ocrClient = new TencentOcrClient(Properties.Settings.Default.TencentSecretId, Properties.Settings.Default.TencentSecretKey);
int total = ImageItems.Count;
int processed = 0;
foreach (var item in ImageItems)
{
try
{
string result = await ocrClient.GeneralBasicOCR(item.FilePath);
item.OcrResult = result.Length > 100 ? result.Substring(0, 100) + "..." : result;
// 这里可以添加处理OCR结果并重命名文件的逻辑
// 例如:根据识别到的文字重命名文件
// string newName = ExtractNameFromOCR(result) + Path.GetExtension(item.FilePath);
// File.Move(item.FilePath, Path.Combine(Path.GetDirectoryName(item.FilePath), newName));
}
catch (Exception ex)
{
item.Status = "识别失败: " + ex.Message;
Log("识别失败: " + item.FileName + " - " + ex.Message);
}
finally
{
processed++;
Progress = (int)(((double)processed / total) * 100);
}
}
IsProcessing = false;
Log("所有图片已处理完成!");
}
private void Log(string message)
{
Dispatcher.Invoke(() =>
{
LogTextBox.AppendText(message + "
");
LogTextBox.ScrollToEnd();
});
}
public event PropertyChangedEventHandler PropertyChanged;
protected void OnPropertyChanged([CallerMemberName] string name = null)
{
PropertyChanged?.Invoke(this, new PropertyChangedEventArgs(name));
}
}
public class ImageItem : INotifyPropertyChanged
{
private string _filePath;
private string _fileName;
private string _ocrResult;
private string _status;
private System.Windows.Media.Imaging.BitmapImage _thumbnail;
public string FilePath
{
get => _filePath;
set
{
_filePath = value;
OnPropertyChanged();
}
}
public string FileName
{
get => _fileName;
set
{
_fileName = value;
OnPropertyChanged();
}
}
public string OcrResult
{
get => _ocrResult;
set
{
_ocrResult = value;
OnPropertyChanged();
}
}
public string Status
{
get => _status;
set
{
_status = value;
OnPropertyChanged();
}
}
public System.Windows.Media.Imaging.BitmapImage Thumbnail
{
get => _thumbnail;
set
{
_thumbnail = value;
OnPropertyChanged();
}
}
public event PropertyChangedEventHandler PropertyChanged;
protected void OnPropertyChanged([CallerMemberName] string name = null)
{
PropertyChanged?.Invoke(this, new PropertyChangedEventArgs(name));
}
}
}
csharpusing Newtonsoft.Json.Linq;
using System;
using System.IO;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using TencentCloud.Common;
using TencentCloud.Common.Profile;
using TencentCloud.Ocr.V20181119;
using TencentCloud.Ocr.V20181119.Models;
namespace ImageOCRRenamer.Services
{
public class TencentOcrClient
{
private readonly OcrClient _client;
public TencentOcrClient(string secretId, string secretKey)
{
try
{
var cred = new Credential(secretId, secretKey);
var httpProfile = new HttpProfile();
httpProfile.Endpoint = "ocr.tencentcloudapi.com";
var clientProfile = new ClientProfile();
clientProfile.HttpProfile = httpProfile;
_client = new OcrClient(cred, "ap-guangzhou", clientProfile);
}
catch (Exception ex)
{
throw new Exception("初始化腾讯OCR客户端失败: " + ex.Message);
}
}
public async Task<string> GeneralBasicOCR(string imagePath)
{
try
{
var req = new GeneralBasicOCRRequest();
using (var imageStream = new FileStream(imagePath, FileMode.Open, FileAccess.Read))
{
var imageBytes = new byte[imageStream.Length];
await imageStream.ReadAsync(imageBytes, 0, imageBytes.Length);
req.ImageBase64 = Convert.ToBase64String(imageBytes);
}
var resp = await _client.GeneralBasicOCR(req);
return FormatOCRResult(resp.TextDetections);
}
catch (Exception ex)
{
throw new Exception("OCR识别失败: " + ex.Message);
}
}
private string FormatOCRResult(JArray detections)
{
var result = new StringBuilder();
foreach (var detection in detections)
{
if (detection["DetectedText"] != null)
{
result.AppendLine(detection["DetectedText"].ToString());
}
}
return result.ToString();
}
}
}
注意:上述TencentOcrClient
类简化了腾讯OCR API的调用过程。实际项目中,建议参考腾讯云官方文档和SDK,处理更多的返回字段和错误情况。
在Properties
文件夹下创建Settings.settings
,添加腾讯云的SecretId
和SecretKey
。
SecretId (string)
SecretKey (string)
并在代码中读取这些设置:
csharpProperties.Settings.Default.TencentSecretId
Properties.Settings.Default.TencentSecretKey
在StartRecognition_Click
方法中,添加根据OCR结果重命名文件的逻辑。例如,提取识别到的第一个文本作为新文件名:
csharpprivate async void StartRecognition_Click(object sender, RoutedEventArgs e)
{
if (string.IsNullOrEmpty(Properties.Settings.Default.TencentSecretId) ||
string.IsNullOrEmpty(Properties.Settings.Default.TencentSecretKey))
{
MessageBox.Show("请先配置腾讯云的SecretId和SecretKey!");
return;
}
IsProcessing = true;
Progress = 0;
var ocrClient = new TencentOcrClient(Properties.Settings.Default.TencentSecretId, Properties.Settings.Default.TencentSecretKey);
int total = ImageItems.Count;
int processed = 0;
foreach (var item in ImageItems)
{
try
{
string result = await ocrClient.GeneralBasicOCR(item.FilePath);
item.OcrResult = result.Length > 100 ? result.Substring(0, 100) + "..." : result;
// 提取第一个识别结果作为新文件名
var firstLine = result.Split('
').FirstOrDefault();
if (!string.IsNullOrEmpty(firstLine))
{
string newName = SanitizeFileName(firstLine) + Path.GetExtension(item.FilePath);
string newPath = Path.Combine(Path.GetDirectoryName(item.FilePath), newName);
int counter = 1;
while (File.Exists(newPath))
{
newName = $"{SanitizeFileName(firstLine)}_{counter}{Path.GetExtension(item.FilePath)}";
newPath = Path.Combine(Path.GetDirectoryName(item.FilePath), newName);
counter++;
}
File.Move(item.FilePath, newPath);
item.FilePath = newPath;
item.FileName = Path.GetFileName(newPath);
}
}
catch (Exception ex)
{
item.Status = "识别失败: " + ex.Message;
Log("识别失败: " + item.FileName + " - " + ex.Message);
}
finally
{
processed++;
Progress = (int)(((double)processed / total) * 100);
}
}
IsProcessing = false;
Log("所有图片已处理完成!");
}
private string SanitizeFileName(string input)
{
return new string(input.Where(c => !Path.GetInvalidFileNameChars().Contains(c)).ToArray());
}
注意:
SanitizeFileName
方法进行过滤。本项目基于WPF和腾讯OCR技术,实现了一个批量图片文字识别与重命名的桌面应用程序。通过以下步骤,完成了从需求分析到功能实现的全过程:
async
和await
关键字,确保UI界面的响应性,避免因网络请求导致的界面卡顿。原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。