文章/答案/技术大牛

发布

社区首页 >问答首页 >我是零基础用户，想请教大神，我这个代码在Python跑完没有将腾讯云的OCR识别结果写入导出的表格里面是什么问题，我想用来批量识别微信支付的订单截图，谢谢！？

问我是零基础用户，想请教大神，我这个代码在Python跑完没有将腾讯云的OCR识别结果写入导出的表格里面是什么问题，我想用来批量识别微信支付的订单截图，谢谢！？

提问于 2024-10-18 18:32:43

回答 0关注 0查看 16

import json
import base64
import logging
import os
import pandas as pd
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
from tencentcloud.ocr.v20181119 import ocr_client, models

# 腾讯云OCR服务的密钥和区域
secret_id = "***"
secret_key = "***"
region = "ap-guangzhou"

# 图片文件夹路径
image_folder_path = "D:/报销"

# 实例化一个认证对象
cred = credential.Credential(secret_id, secret_key)

# 实例化一个http选项
httpProfile = HttpProfile()
httpProfile.endpoint = "ocr.tencentcloudapi.com"

# 实例化一个client选项
clientProfile = ClientProfile()
clientProfile.httpProfile = httpProfile

# 实例化OCR client对象
client = ocr_client.OcrClient(cred, region, clientProfile)

# 创建一个空的DataFrame来存储结果
results_df = pd.DataFrame(columns=["文件名", "金额", "支付时间", "商户全称", "交易单号"])

# 遍历文件夹中的图片文件
for filename in os.listdir(image_folder_path):
    if filename.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif")):
        file_path = os.path.join(image_folder_path, filename)
        logging.info(f"正在处理文件: {filename}")

        try:
            # 读取并编码图片为Base64字符串
            with open(file_path, "rb") as image_file:
                image_base64 = base64.b64encode(image_file.read()).decode('utf-8')

                # 实例化一个请求对象并设置请求参数
            req = models.SmartStructuralOCRV2Request()
            req.ItemNames = ["金额", "支付时间", "商户全称", "交易单号"]
            req.ImageBase64 = image_base64

            # 发送请求并获取响应
            resp = client.SmartStructuralOCRV2(req)

            # 解析响应结果
            response_data = json.loads(resp.to_json_string())
            ocr_data = response_data.get('Data', {}).get('Items', [])

            if ocr_data and isinstance(ocr_data, list) and len(ocr_data) > 0:
                result_item = ocr_data[0]  # 这里只取第一个结果作为示例
                result_dict = {key: result_item.get(key, '') for key in ["金额", "支付时间", "商户全称", "交易单号"]}
                result_dict["文件名"] = filename
                results_df = results_df.append(result_dict, ignore_index=True)

        except TencentCloudSDKException as err:
            logging.error(f"处理文件 {filename} 时出错: {err}")
        except Exception as e:
            logging.error(f"处理文件 {filename} 时发生意外错误: {e}")

        # 将结果DataFrame保存为Excel文件
desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop')
output_file_path = os.path.join(desktop_path, "OCR_Results.xlsx")
results_df.to_excel(output_file_path, index=False)
logging.info(f"OCR结果已保存至 {output_file_path}")

文字识别