import osimport base64import tkinter as tkfrom tkinter import ttk, filedialog, messageboxfrom openpyxl import Workbookimport csvimport timeclass OCRApp: def __init__(self, root): self.root = root self.root.title("Qwen-VL OCR 表格识别工具 v3.0") self.root.geometry("700x500") self.root.resizable(False, False) self.root.configure(bg="#f0f0f0") style = ttk.Style() style.theme_use('clam') style.configure("TButton", font=("微软雅黑", 10), padding=5) style.configure("TLabel", font=("微软雅黑", 10), background="#f0f0f0") style.configure("TEntry", font=("微软雅黑", 10), padding=3) style.configure("TProgressbar", thickness=8) style.configure("Green.TButton", background="#2ecc71", foreground="white", font=("微软雅黑", 12, "bold")) style.configure("Radio.TRadiobutton", background="#f0f0f0", font=("微软雅黑", 10)) title_label = tk.Label(root, text="🔍 Qwen-VL OCR 表格识别工具", font=("微软雅黑", 14, "bold"), bg="#f0f0f0", fg="#2c3e50") title_label.pack(pady=10) frame_mode = tk.Frame(root, bg="#f0f0f0") frame_mode.pack(pady=5, fill="x", padx=20) tk.Label(frame_mode, text="📋 处理模式:", font=( "微软雅黑", 10), bg="#f0f0f0").pack(side="left") self.process_mode = tk.StringVar(value="single") ttk.Radiobutton(frame_mode, text="单文件处理", variable=self.process_mode, value="single", style="Radio.TRadiobutton", command=self.toggle_process_mode).pack(side="left", padx=10) ttk.Radiobutton(frame_mode, text="批量文件夹处理", variable=self.process_mode, value="batch", style="Radio.TRadiobutton", command=self.toggle_process_mode).pack(side="left") frame_api = tk.Frame(root, bg="#f0f0f0") frame_api.pack(pady=5, fill="x", padx=20) tk.Label(frame_api, text="🔑 API Key:", font=( "微软雅黑", 10), bg="#f0f0f0").pack(side="left") self.api_key_entry = tk.Entry( frame_api, width=40, show="*", font=("Consolas", 10)) self.api_key_entry.pack(side="left", padx=10, expand=True, fill="x") self.frame_single_image = tk.Frame(root, bg="#f0f0f0") self.frame_single_image.pack(pady=5, fill="x", padx=20) tk.Label(self.frame_single_image, text="🖼️ 图片路径:", font=( "微软雅黑", 10), bg="#f0f0f0").pack(side="left") self.image_path_var = tk.StringVar() self.image_entry = tk.Entry(self.frame_single_image, textvariable=self.image_path_var, width=30, font=( "Consolas", 10), state='readonly') self.image_entry.pack(side="left", padx=5, expand=True, fill="x") ttk.Button(self.frame_single_image, text="📂 选择图片", command=self.select_image).pack(side="left", padx=5) self.frame_single_output = tk.Frame(root, bg="#f0f0f0") self.frame_single_output.pack(pady=5, fill="x", padx=20) tk.Label(self.frame_single_output, text="💾 输出路径:", font=( "微软雅黑", 10), bg="#f0f0f0").pack(side="left") self.output_path_var = tk.StringVar() self.output_entry = tk.Entry(self.frame_single_output, textvariable=self.output_path_var, width=30, font=( "Consolas", 10), state='readonly') self.output_entry.pack(side="left", padx=5, expand=True, fill="x") ttk.Button(self.frame_single_output, text="📁 选择输出", command=self.select_output).pack(side="left", padx=5) self.frame_batch_folder = tk.Frame(root, bg="#f0f0f0") self.frame_batch_folder.pack(pady=5, fill="x", padx=20) self.frame_batch_folder.pack_forget() tk.Label(self.frame_batch_folder, text="📂 图片文件夹:", font=( "微软雅黑", 10), bg="#f0f0f0").pack(side="left") self.folder_path_var = tk.StringVar() self.folder_entry = tk.Entry(self.frame_batch_folder, textvariable=self.folder_path_var, width=30, font=( "Consolas", 10), state='readonly') self.folder_entry.pack(side="left", padx=5, expand=True, fill="x") ttk.Button(self.frame_batch_folder, text="📁 选择文件夹", command=self.select_folder).pack(side="left", padx=5) self.frame_batch_output = tk.Frame(root, bg="#f0f0f0") self.frame_batch_output.pack(pady=5, fill="x", padx=20) self.frame_batch_output.pack_forget() tk.Label(self.frame_batch_output, text="📁 输出文件夹:", font=( "微软雅黑", 10), bg="#f0f0f0").pack(side="left") self.batch_output_var = tk.StringVar() self.batch_output_entry = tk.Entry(self.frame_batch_output, textvariable=self.batch_output_var, width=30, font=( "Consolas", 10), state='readonly') self.batch_output_entry.pack( side="left", padx=5, expand=True, fill="x") ttk.Button(self.frame_batch_output, text="📁 选择输出", command=self.select_batch_output).pack(side="left", padx=5) frame_progress = tk.Frame(root, bg="#f0f0f0") frame_progress.pack(pady=10, fill="x", padx=20) self.progress_label = tk.Label(frame_progress, text="准备就绪...", font=( "微软雅黑", 9), bg="#f0f0f0", fg="#7f8c8d") self.progress_label.pack(side="top", anchor="w") self.progress_bar = ttk.Progressbar( frame_progress, length=500, mode='determinate') self.progress_bar.pack(side="top", pady=5) start_frame = tk.Frame(root, bg="#f0f0f0") start_frame.pack(pady=15) self.start_button = ttk.Button( start_frame, text="🚀 开始识别", command=self.start_processing, width=20) self.start_button.pack() self.start_button.configure(style="Green.TButton") self.status_label = tk.Label(root, text="", font=( "微软雅黑", 10), bg="#f0f0f0", fg="#2c3e50") self.status_label.pack(pady=10) def toggle_process_mode(self): """切换单文件/批量处理模式的UI显示""" mode = self.process_mode.get() if mode == "single": self.frame_single_image.pack(pady=5, fill="x", padx=20) self.frame_single_output.pack(pady=5, fill="x", padx=20) self.frame_batch_folder.pack_forget() self.frame_batch_output.pack_forget() else: self.frame_single_image.pack_forget() self.frame_single_output.pack_forget() self.frame_batch_folder.pack(pady=5, fill="x", padx=20) self.frame_batch_output.pack(pady=5, fill="x", padx=20) def select_image(self): path = filedialog.askopenfilename( title="选择表格图片", filetypes=[("Image Files", "*.png *.jpg *.jpeg *.gif *.bmp")] ) if path: self.image_path_var.set(path) output_path = os.path.splitext(path)[0] + ".xlsx" self.output_path_var.set(output_path) def select_output(self): path = filedialog.asksaveasfilename( title="选择输出 Excel 文件", defaultextension=".xlsx", filetypes=[("Excel 文件", "*.xlsx"), ("所有文件", "*.*")] ) if path: self.output_path_var.set(path) def select_folder(self): path = filedialog.askdirectory(title="选择图片文件夹") if path: self.folder_path_var.set(path) self.batch_output_var.set(path) def select_batch_output(self): path = filedialog.askdirectory(title="选择批量输出文件夹") if path: self.batch_output_var.set(path) def image_to_base64(self, image_path): """将本地图片转为 base64 字符串""" with open(image_path, "rb") as f: return base64.b64encode(f.read()).decode("utf-8") def update_progress(self, value, message): """更新进度条和状态信息""" self.progress_bar['value'] = value self.progress_label.config(text=message) self.root.update_idletasks() def process_single_file(self, api_key, image_path, output_path): """处理单个图片文件""" try: time.sleep(0.2) from openai import OpenAI client = OpenAI( api_key=api_key, base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" ) self.update_progress(30, "正在读取图片...") image_b64 = self.image_to_base64(image_path) self.update_progress(50, "正在调用模型识别...") messages = [ { "role": "user", "content": [ { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_b64}" } }, { "type": "text", "text": ( "请识别这张图片中的表格内容,并以标准 CSV 格式输出,包含表头。\n" "不要添加任何解释、前缀或后缀,只输出纯 CSV 内容。\n" "例如:\n姓名,年龄,城市\n张三,25,北京\n李四,30,上海" ) } ] } ] completion = client.chat.completions.create( model="qwen-vl-ocr-2025-04-13", messages=messages, max_tokens=4096, temperature=0.0 ) csv_text = completion.choices[0].message.content.strip() self.update_progress(80, "正在保存为 Excel...") self.save_csv_to_excel(csv_text, output_path) return True, f"成功处理: {os.path.basename(image_path)}" except Exception as e: return False, f"处理 {os.path.basename(image_path)} 失败: {str(e)}" def save_csv_to_excel(self, csv_text, output_path): """将CSV文本保存为Excel文件""" lines = csv_text.splitlines() if not lines: raise ValueError("模型未返回有效数据") wb = Workbook() ws = wb.active for line in lines: row_data = line.split(',') ws.append(row_data) wb.save(output_path) def start_processing(self): """开始处理(单文件或批量)""" api_key = self.api_key_entry.get().strip() if not api_key: messagebox.showerror("错误", "请先输入 API Key!") return try: self.update_progress(10, "正在初始化...") if self.process_mode.get() == "single": image_path = self.image_path_var.get() output_path = self.output_path_var.get() if not image_path: messagebox.showerror("错误", "请选择图片文件!") return if not output_path: messagebox.showerror("错误", "请选择输出文件路径!") return success, message = self.process_single_file( api_key, image_path, output_path) self.update_progress(100, message) self.status_label.config(text=message) if success: messagebox.showinfo("成功", f"表格已保存至:\n{output_path}") else: folder_path = self.folder_path_var.get() output_folder = self.batch_output_var.get() if not folder_path: messagebox.showerror("错误", "请选择图片文件夹!") return if not output_folder: messagebox.showerror("错误", "请选择输出文件夹!") return image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.bmp') image_files = [ f for f in os.listdir(folder_path) if f.lower().endswith(image_extensions) and os.path.isfile(os.path.join(folder_path, f)) ] if not image_files: messagebox.showinfo("提示", "所选文件夹中没有图片文件!") self.update_progress(0, "准备就绪...") return total_files = len(image_files) success_count = 0 error_messages = [] for i, filename in enumerate(image_files, 1): try: overall_progress = 10 + (i / total_files) * 80 self.update_progress(overall_progress, f"正在处理 {i}/{total_files}: {filename}") image_path = os.path.join(folder_path, filename) base_name = os.path.splitext(filename)[0] output_path = os.path.join( output_folder, f"{base_name}.xlsx") success, msg = self.process_single_file( api_key, image_path, output_path) if success: success_count += 1 else: error_messages.append(msg) except Exception as e: error_messages.append(f"处理 {filename} 时出错: {str(e)}") self.update_progress( 100, f"批量处理完成: {success_count}/{total_files} 成功") self.status_label.config( text=f"批量处理完成: {success_count}/{total_files} 成功") result_msg = f"批量处理完成!\n成功: {success_count} 个文件\n失败: {total_files - success_count} 个文件" if error_messages: result_msg += "\n\n错误详情:\n" + \ "\n".join(error_messages[:5]) if len(error_messages) > 5: result_msg += f"\n... 还有 {len(error_messages) - 5} 个错误" messagebox.showinfo("批量处理完成", result_msg) except Exception as e: self.update_progress(0, f"❌ 错误: {str(e)}") messagebox.showerror("错误", f"处理失败:\n{str(e)}")if __name__ == "__main__": root = tk.Tk() app = OCRApp(root) root.mainloop()