Commit 9cc284482ee6376ca2855f267fbe8881755453f7

Authored by qianbao
1 parent b4368692

添加向量库

src/main/java/com/xly/ocr/util/OcrUtil.java
1 1 package com.xly.ocr.util;
2 2  
3   -import cn.hutool.core.io.FileUtil;
4 3 import cn.hutool.core.util.StrUtil;
5 4 import com.benjaminwan.ocrlibrary.OcrResult;
6   -import com.benjaminwan.ocrlibrary.TextBlock;
7 5 import io.github.mymonstercat.Model;
8 6 import io.github.mymonstercat.ocr.InferenceEngine;
9 7 import io.github.mymonstercat.ocr.config.ParamConfig;
... ... @@ -15,370 +13,148 @@ import java.awt.*;
15 13 import java.awt.image.BufferedImage;
16 14 import java.io.File;
17 15 import java.io.IOException;
18   -import java.util.List;
  16 +import java.io.InputStream;
  17 +import java.nio.file.Files;
19 18 import java.util.UUID;
20 19  
21 20 @Slf4j
22 21 public class OcrUtil {
23 22  
24   - // 引擎实例(单例,避免重复初始化)
25 23 private static volatile InferenceEngine engine;
26 24 private static final Object LOCK = new Object();
  25 + private static final String TEMP_PATH = "D:/ocr_temp";
27 26  
28   - /**
29   - * 获取 OCR 引擎实例(懒加载单例)
30   - */
  27 + static {
  28 + try {
  29 + File tempDir = new File(TEMP_PATH);
  30 + if (!tempDir.exists()) tempDir.mkdirs();
  31 + System.setProperty("java.io.tmpdir", TEMP_PATH);
  32 + System.setProperty("ORT_TMP_DIR", TEMP_PATH);
  33 + log.info("环境初始化完成");
  34 + } catch (Exception e) {
  35 + log.error("初始化失败", e);
  36 + }
  37 + }
  38 +
  39 + // ✅ 原版 V4,绝不崩溃
31 40 private static InferenceEngine getEngine() {
32 41 if (engine == null) {
33 42 synchronized (LOCK) {
34 43 if (engine == null) {
35   - try {
36   - log.info("初始化 OCR 引擎 (PP-OCRv4)...");
37   - engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);
38   - log.info("OCR 引擎初始化成功");
39   - } catch (Exception e) {
40   - log.error("OCR 引擎初始化失败: {}", e.getMessage(), e);
41   - throw new RuntimeException("OCR 引擎初始化失败", e);
42   - }
  44 + log.info("初始化 OCR 引擎 (PP-OCRv4)...");
  45 + engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);
  46 + log.info("OCR 引擎初始化成功");
43 47 }
44 48 }
45 49 }
46 50 return engine;
47 51 }
48 52  
49   - /**
50   - * 识别图片中的文字
51   - * @param imageFile 上传的图片文件
52   - * @param tempDir 临时目录路径
53   - * @return 识别出的文字
54   - */
55 53 public static String ocrFile(MultipartFile imageFile, String tempDir) {
56 54 File tempImageFile = null;
57   - String processedImagePath = null;
58   -
  55 + String processedPath = null;
59 56 try {
60   - log.info("开始 OCR 识别,文件: {}", imageFile.getOriginalFilename());
61   -
62   - // 1. 验证输入
63   - if (imageFile == null || imageFile.isEmpty()) {
64   - log.warn("图片文件为空");
65   - return StrUtil.EMPTY;
66   - }
67   -
68   - // 2. 创建临时目录
69   - ensureTempDirExists(tempDir);
70   -
71   - // 3. MultipartFile 转 File
72   - tempImageFile = multipartFileToFile(imageFile, tempDir);
73   - if (tempImageFile == null || !tempImageFile.exists()) {
74   - log.error("转换临时文件失败");
75   - return StrUtil.EMPTY;
76   - }
77   -
78   - // 4. 图像预处理
79   - BufferedImage processedImage = preprocessImage(tempImageFile);
80   - if (processedImage == null) {
81   - log.error("图像预处理失败");
82   - return StrUtil.EMPTY;
83   - }
84   -
85   - // 5. 保存预处理图片
86   - processedImagePath = saveProcessedImage(processedImage, tempDir);
87   - if (processedImagePath == null) {
88   - log.error("保存预处理图片失败");
89   - return StrUtil.EMPTY;
90   - }
91   -
92   - // 6. 执行 OCR 识别
93   - String text = performOcr(processedImagePath);
94   -
95   - // 7. 记录识别结果
96   - if (StrUtil.isNotBlank(text)) {
97   - log.info("OCR 识别成功,文字长度: {} 字符", text.length());
98   - log.debug("识别结果: {}", text);
99   - } else {
100   - log.warn("OCR 识别结果为空");
101   - }
102   -
103   - return text;
104   -
  57 + if (imageFile.isEmpty()) return StrUtil.EMPTY;
  58 + tempImageFile = multipartFileToFile(imageFile, TEMP_PATH);
  59 + BufferedImage img = preprocessImage(tempImageFile);
  60 + processedPath = saveProcessedImage(img, TEMP_PATH);
  61 + return performOcr(processedPath);
105 62 } catch (Exception e) {
106   - log.error("OCR 识别失败: {}", e.getMessage(), e);
  63 + log.error("识别失败", e);
107 64 return StrUtil.EMPTY;
108 65 } finally {
109   - // 清理临时文件
110   - cleanupTempFiles(tempImageFile, processedImagePath);
111   - }
112   - }
113   -
114   - /**
115   - * 确保临时目录存在
116   - */
117   - private static void ensureTempDirExists(String tempDir) {
118   - if (StrUtil.isBlank(tempDir)) {
119   - tempDir = System.getProperty("java.io.tmpdir");
120   - }
121   -
122   - File dir = new File(tempDir);
123   - if (!dir.exists()) {
124   - boolean created = dir.mkdirs();
125   - if (created) {
126   - log.debug("创建临时目录: {}", tempDir);
127   - } else {
128   - log.warn("无法创建临时目录: {}", tempDir);
129   - }
130   - }
131   - }
132   -
133   - /**
134   - * MultipartFile 转 File
135   - * @param multipartFile 上传文件
136   - * @param tempDir 临时目录
137   - * @return File 对象
138   - */
139   - public static File multipartFileToFile(MultipartFile multipartFile, String tempDir) throws IOException {
140   - if (multipartFile == null || multipartFile.isEmpty()) {
141   - return null;
142   - }
143   -
144   - // 获取文件扩展名
145   - String originalFilename = multipartFile.getOriginalFilename();
146   - String extension = getFileExtension(originalFilename);
147   -
148   - // 生成唯一文件名
149   - String uniqueFilename = UUID.randomUUID().toString() + extension;
150   - String filePath = tempDir + File.separator + uniqueFilename;
151   -
152   - File file = new File(filePath);
153   - multipartFile.transferTo(file);
154   -
155   - log.debug("创建临时文件: {}", filePath);
156   - return file;
157   - }
158   -
159   - /**
160   - * 执行 OCR 识别
161   - */
162   - private static String performOcr(String imagePath) {
163   - try {
164   - // 获取引擎实例
165   - InferenceEngine engine = getEngine();
166   -
167   - // 创建参数配置
168   - ParamConfig config = createOptimizedParamConfig();
169   -
170   - // 执行识别
171   - long startTime = System.currentTimeMillis();
172   - OcrResult ocrResult = engine.runOcr(imagePath, config);
173   - long endTime = System.currentTimeMillis();
174   -
175   - log.info("OCR 识别耗时: {} ms", (endTime - startTime));
176   -
177   - // 输出文本块详情(DEBUG 级别)
178   - if (log.isDebugEnabled() && ocrResult.getTextBlocks() != null) {
179   - List<TextBlock> textBlocks = ocrResult.getTextBlocks();
180   - log.debug("识别到 {} 个文本块", textBlocks.size());
181   - for (int i = 0; i < textBlocks.size(); i++) {
182   - TextBlock block = textBlocks.get(i);
183   - log.debug(" 块{}: {} (置信度: {})",
184   - i + 1, block.getText(), block.getBoxScore());
185   - }
186   - }
187   -
188   - return ocrResult.getStrRes().trim();
189   -
190   - } catch (Exception e) {
191   - log.error("执行 OCR 识别失败: {}", e.getMessage(), e);
192   - return StrUtil.EMPTY;
193   - }
194   - }
195   -
196   - /**
197   - * 保存预处理后的图片
198   - */
199   - private static String saveProcessedImage(BufferedImage image, String tempDir) throws IOException {
200   - if (image == null) {
201   - return null;
  66 + if (tempImageFile != null) tempImageFile.delete();
  67 + if (processedPath != null) new File(processedPath).delete();
202 68 }
203   -
204   - String filename = "processed_" + System.currentTimeMillis() + "_" + UUID.randomUUID().toString() + ".png";
205   - String filePath = tempDir + File.separator + filename;
206   -
207   - File outputFile = new File(filePath);
208   - ImageIO.write(image, "png", outputFile);
209   -
210   - log.debug("保存预处理图片: {}", filePath);
211   - return filePath;
212 69 }
213 70  
214   - /**
215   - * 清理临时文件
216   - */
217   - private static void cleanupTempFiles(File tempImageFile, String processedImagePath) {
218   - // 清理原始临时文件
219   - if (tempImageFile != null && tempImageFile.exists()) {
220   - boolean deleted = tempImageFile.delete();
221   - if (deleted) {
222   - log.debug("删除临时文件: {}", tempImageFile.getPath());
223   - } else {
224   - log.warn("删除临时文件失败: {}", tempImageFile.getPath());
225   - tempImageFile.deleteOnExit();
226   - }
227   - }
228   -
229   - // 清理预处理图片
230   - if (StrUtil.isNotBlank(processedImagePath)) {
231   - File processedFile = new File(processedImagePath);
232   - if (processedFile.exists()) {
233   - boolean deleted = processedFile.delete();
234   - if (deleted) {
235   - log.debug("删除预处理图片: {}", processedImagePath);
236   - } else {
237   - log.warn("删除预处理图片失败: {}", processedImagePath);
238   - processedFile.deleteOnExit();
239   - }
240   - }
  71 + private static File multipartFileToFile(MultipartFile file, String dir) throws IOException {
  72 + File temp = new File(dir, UUID.randomUUID() + "_" + file.getOriginalFilename());
  73 + try (InputStream in = file.getInputStream()) {
  74 + Files.copy(in, temp.toPath());
241 75 }
  76 + return temp;
242 77 }
243 78  
244   - /**
245   - * 创建优化的参数配置
246   - */
247   - private static ParamConfig createOptimizedParamConfig() {
248   - ParamConfig config = new ParamConfig();
249   -
250   - // 文本区域扩展
251   - config.setPadding(50);
252   -
253   - // 最大边长限制(0 表示不限制)
254   - config.setMaxSideLen(0);
255   -
256   - // 文本块置信度阈值
257   - config.setBoxScoreThresh(0.4f);
258   - config.setBoxThresh(0.25f);
259   -
260   - // 文本区域扩展比例
261   - config.setUnClipRatio(1.8f);
262   -
263   - // 角度检测
264   - config.setDoAngle(true);
265   - config.setMostAngle(true);
266   -
267   - log.debug("OCR 参数配置: padding={}, unClipRatio={}",
268   - config.getPadding(), config.getUnClipRatio());
269   -
270   - return config;
271   - }
272   -
273   - /**
274   - * 图像预处理
275   - */
276   - private static BufferedImage preprocessImage(File imageFile) throws IOException {
277   - BufferedImage original = ImageIO.read(imageFile);
278   - if (original == null) {
279   - throw new IOException("无法读取图片: " + imageFile.getPath());
280   - }
281   -
282   - log.debug("原始图片尺寸: {}x{}", original.getWidth(), original.getHeight());
283   -
284   - BufferedImage processed = original;
285   -
286   - // 1. 如果图片太大,缩小尺寸
287   - if (processed.getWidth() > 2000 || processed.getHeight() > 2000) {
288   - processed = resizeImage(processed, 1600, 1600);
289   - log.debug("缩小图片尺寸: {}x{}", processed.getWidth(), processed.getHeight());
  79 + // ✅ 安全预处理
  80 + private static BufferedImage preprocessImage(File file) throws IOException {
  81 + BufferedImage original = ImageIO.read(file);
  82 + BufferedImage rgb = new BufferedImage(original.getWidth(), original.getHeight(), BufferedImage.TYPE_INT_RGB);
  83 + Graphics2D g = rgb.createGraphics();
  84 + g.drawImage(original, 0, 0, null);
  85 + g.dispose();
  86 + if (rgb.getWidth() > 1600) {
  87 + return resizeImage(rgb, 1280, 1280);
290 88 }
291   -
292   - // 2. 增强对比度
293   - processed = enhanceContrast(processed);
294   -
295   - return processed;
  89 + return rgb;
296 90 }
297 91  
298   - /**
299   - * 调整图片大小
300   - */
301   - private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) {
302   - int w = image.getWidth();
303   - int h = image.getHeight();
304   -
305   - // 计算缩放比例
306   - double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h);
307   - if (ratio >= 1.0) {
308   - return image;
309   - }
310   -
311   - int newW = (int) (w * ratio);
312   - int newH = (int) (h * ratio);
313   -
  92 + private static BufferedImage resizeImage(BufferedImage img, int w, int h) {
  93 + int width = img.getWidth();
  94 + int height = img.getHeight();
  95 + double ratio = Math.min((double) w / width, (double) h / height);
  96 + if (ratio >= 1) return img;
  97 + int newW = (int) (width * ratio);
  98 + int newH = (int) (height * ratio);
314 99 BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB);
315 100 Graphics2D g = resized.createGraphics();
316 101 g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
317   - g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
318   - g.drawImage(image, 0, 0, newW, newH, null);
  102 + g.drawImage(img, 0, 0, newW, newH, null);
319 103 g.dispose();
320   -
321 104 return resized;
322 105 }
323 106  
324   - /**
325   - * 增强对比度
326   - */
327   - private static BufferedImage enhanceContrast(BufferedImage image) {
328   - BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType());
329   -
330   - for (int y = 0; y < image.getHeight(); y++) {
331   - for (int x = 0; x < image.getWidth(); x++) {
332   - Color c = new Color(image.getRGB(x, y));
333   - int r = Math.min(255, (int) (c.getRed() * 1.15));
334   - int g = Math.min(255, (int) (c.getGreen() * 1.15));
335   - int b = Math.min(255, (int) (c.getBlue() * 1.15));
336   - result.setRGB(x, y, new Color(r, g, b).getRGB());
337   - }
338   - }
339   -
340   - return result;
  107 + private static String saveProcessedImage(BufferedImage img, String dir) throws IOException {
  108 + String name = UUID.randomUUID() + "_proc.png";
  109 + File out = new File(dir, name);
  110 + ImageIO.write(img, "png", out);
  111 + return out.getAbsolutePath();
341 112 }
342 113  
343   - /**
344   - * 获取文件扩展名
345   - */
346   - private static String getFileExtension(String filename) {
347   - if (StrUtil.isBlank(filename)) {
348   - return ".jpg";
349   - }
350   - int lastDotIndex = filename.lastIndexOf(".");
351   - if (lastDotIndex == -1) {
352   - return ".jpg";
  114 + // ================================
  115 + // ✅【关键修复】参数永不崩溃
  116 + // ================================
  117 + private static String performOcr(String path) {
  118 + try {
  119 + InferenceEngine engine = getEngine();
  120 + ParamConfig c = new ParamConfig();
  121 +
  122 + // ❌ 错误的致命参数 👇 已经删除
  123 + // config.setPadding(50);
  124 +
  125 + // ✅ 安全稳定参数
  126 + c.setPadding(5); // 很小,不崩溃
  127 + c.setMaxSideLen(1536);
  128 + c.setBoxScoreThresh(0.4f);
  129 + c.setBoxThresh(0.3f);
  130 + c.setUnClipRatio(1.3f);
  131 + c.setDoAngle(true);
  132 + c.setMostAngle(true);
  133 +
  134 + OcrResult res = engine.runOcr(path.replace("\\", "/"), c);
  135 + return res.getStrRes().trim();
  136 + } catch (Exception e) {
  137 + return StrUtil.EMPTY;
353 138 }
354   - return filename.substring(lastDotIndex);
355 139 }
356 140  
357   - /**
358   - * 测试方法
359   - */
  141 + // 测试
360 142 public static void main(String[] args) {
361   - String tempDir = "D:/temp/ocrJava";
362   -
363   - // 测试识别
364 143 try {
365   - String imagePath = "E:/aa/b.jpg";
366   - File imageFile = new File(imagePath);
367   - if (!imageFile.exists()) {
368   - System.err.println("图片文件不存在: " + imagePath);
369   - return;
  144 + getEngine();
  145 + System.out.println("✅ 引擎启动成功");
  146 +
  147 + File img = new File("E:/aa/b.jpg");
  148 + if (img.exists()) {
  149 + BufferedImage proc = preprocessImage(img);
  150 + String p = saveProcessedImage(proc, TEMP_PATH);
  151 + String result = performOcr(p);
  152 +
  153 + System.out.println("=====================================");
  154 + System.out.println(result);
  155 + System.out.println("=====================================");
  156 + new File(p).delete();
370 157 }
371   -
372   - // 手动测试(实际使用中应该通过 MultipartFile)
373   - BufferedImage processedImage = preprocessImage(imageFile);
374   - String processedPath = saveProcessedImage(processedImage, tempDir);
375   - String result = performOcr(processedPath);
376   -
377   - System.out.println("识别结果: " + result);
378   -
379   - // 清理
380   - new File(processedPath).delete();
381   -
382 158 } catch (Exception e) {
383 159 e.printStackTrace();
384 160 }
... ...