Commit 9cc284482ee6376ca2855f267fbe8881755453f7

Authored by qianbao
1 parent b4368692

添加向量库

src/main/java/com/xly/ocr/util/OcrUtil.java
1 package com.xly.ocr.util; 1 package com.xly.ocr.util;
2 2
3 -import cn.hutool.core.io.FileUtil;  
4 import cn.hutool.core.util.StrUtil; 3 import cn.hutool.core.util.StrUtil;
5 import com.benjaminwan.ocrlibrary.OcrResult; 4 import com.benjaminwan.ocrlibrary.OcrResult;
6 -import com.benjaminwan.ocrlibrary.TextBlock;  
7 import io.github.mymonstercat.Model; 5 import io.github.mymonstercat.Model;
8 import io.github.mymonstercat.ocr.InferenceEngine; 6 import io.github.mymonstercat.ocr.InferenceEngine;
9 import io.github.mymonstercat.ocr.config.ParamConfig; 7 import io.github.mymonstercat.ocr.config.ParamConfig;
@@ -15,370 +13,148 @@ import java.awt.*; @@ -15,370 +13,148 @@ import java.awt.*;
15 import java.awt.image.BufferedImage; 13 import java.awt.image.BufferedImage;
16 import java.io.File; 14 import java.io.File;
17 import java.io.IOException; 15 import java.io.IOException;
18 -import java.util.List; 16 +import java.io.InputStream;
  17 +import java.nio.file.Files;
19 import java.util.UUID; 18 import java.util.UUID;
20 19
21 @Slf4j 20 @Slf4j
22 public class OcrUtil { 21 public class OcrUtil {
23 22
24 - // 引擎实例(单例,避免重复初始化)  
25 private static volatile InferenceEngine engine; 23 private static volatile InferenceEngine engine;
26 private static final Object LOCK = new Object(); 24 private static final Object LOCK = new Object();
  25 + private static final String TEMP_PATH = "D:/ocr_temp";
27 26
28 - /**  
29 - * 获取 OCR 引擎实例(懒加载单例)  
30 - */ 27 + static {
  28 + try {
  29 + File tempDir = new File(TEMP_PATH);
  30 + if (!tempDir.exists()) tempDir.mkdirs();
  31 + System.setProperty("java.io.tmpdir", TEMP_PATH);
  32 + System.setProperty("ORT_TMP_DIR", TEMP_PATH);
  33 + log.info("环境初始化完成");
  34 + } catch (Exception e) {
  35 + log.error("初始化失败", e);
  36 + }
  37 + }
  38 +
  39 + // ✅ 原版 V4,绝不崩溃
31 private static InferenceEngine getEngine() { 40 private static InferenceEngine getEngine() {
32 if (engine == null) { 41 if (engine == null) {
33 synchronized (LOCK) { 42 synchronized (LOCK) {
34 if (engine == null) { 43 if (engine == null) {
35 - try {  
36 - log.info("初始化 OCR 引擎 (PP-OCRv4)...");  
37 - engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);  
38 - log.info("OCR 引擎初始化成功");  
39 - } catch (Exception e) {  
40 - log.error("OCR 引擎初始化失败: {}", e.getMessage(), e);  
41 - throw new RuntimeException("OCR 引擎初始化失败", e);  
42 - } 44 + log.info("初始化 OCR 引擎 (PP-OCRv4)...");
  45 + engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);
  46 + log.info("OCR 引擎初始化成功");
43 } 47 }
44 } 48 }
45 } 49 }
46 return engine; 50 return engine;
47 } 51 }
48 52
49 - /**  
50 - * 识别图片中的文字  
51 - * @param imageFile 上传的图片文件  
52 - * @param tempDir 临时目录路径  
53 - * @return 识别出的文字  
54 - */  
55 public static String ocrFile(MultipartFile imageFile, String tempDir) { 53 public static String ocrFile(MultipartFile imageFile, String tempDir) {
56 File tempImageFile = null; 54 File tempImageFile = null;
57 - String processedImagePath = null;  
58 - 55 + String processedPath = null;
59 try { 56 try {
60 - log.info("开始 OCR 识别,文件: {}", imageFile.getOriginalFilename());  
61 -  
62 - // 1. 验证输入  
63 - if (imageFile == null || imageFile.isEmpty()) {  
64 - log.warn("图片文件为空");  
65 - return StrUtil.EMPTY;  
66 - }  
67 -  
68 - // 2. 创建临时目录  
69 - ensureTempDirExists(tempDir);  
70 -  
71 - // 3. MultipartFile 转 File  
72 - tempImageFile = multipartFileToFile(imageFile, tempDir);  
73 - if (tempImageFile == null || !tempImageFile.exists()) {  
74 - log.error("转换临时文件失败");  
75 - return StrUtil.EMPTY;  
76 - }  
77 -  
78 - // 4. 图像预处理  
79 - BufferedImage processedImage = preprocessImage(tempImageFile);  
80 - if (processedImage == null) {  
81 - log.error("图像预处理失败");  
82 - return StrUtil.EMPTY;  
83 - }  
84 -  
85 - // 5. 保存预处理图片  
86 - processedImagePath = saveProcessedImage(processedImage, tempDir);  
87 - if (processedImagePath == null) {  
88 - log.error("保存预处理图片失败");  
89 - return StrUtil.EMPTY;  
90 - }  
91 -  
92 - // 6. 执行 OCR 识别  
93 - String text = performOcr(processedImagePath);  
94 -  
95 - // 7. 记录识别结果  
96 - if (StrUtil.isNotBlank(text)) {  
97 - log.info("OCR 识别成功,文字长度: {} 字符", text.length());  
98 - log.debug("识别结果: {}", text);  
99 - } else {  
100 - log.warn("OCR 识别结果为空");  
101 - }  
102 -  
103 - return text;  
104 - 57 + if (imageFile.isEmpty()) return StrUtil.EMPTY;
  58 + tempImageFile = multipartFileToFile(imageFile, TEMP_PATH);
  59 + BufferedImage img = preprocessImage(tempImageFile);
  60 + processedPath = saveProcessedImage(img, TEMP_PATH);
  61 + return performOcr(processedPath);
105 } catch (Exception e) { 62 } catch (Exception e) {
106 - log.error("OCR 识别失败: {}", e.getMessage(), e); 63 + log.error("识别失败", e);
107 return StrUtil.EMPTY; 64 return StrUtil.EMPTY;
108 } finally { 65 } finally {
109 - // 清理临时文件  
110 - cleanupTempFiles(tempImageFile, processedImagePath);  
111 - }  
112 - }  
113 -  
114 - /**  
115 - * 确保临时目录存在  
116 - */  
117 - private static void ensureTempDirExists(String tempDir) {  
118 - if (StrUtil.isBlank(tempDir)) {  
119 - tempDir = System.getProperty("java.io.tmpdir");  
120 - }  
121 -  
122 - File dir = new File(tempDir);  
123 - if (!dir.exists()) {  
124 - boolean created = dir.mkdirs();  
125 - if (created) {  
126 - log.debug("创建临时目录: {}", tempDir);  
127 - } else {  
128 - log.warn("无法创建临时目录: {}", tempDir);  
129 - }  
130 - }  
131 - }  
132 -  
133 - /**  
134 - * MultipartFile 转 File  
135 - * @param multipartFile 上传文件  
136 - * @param tempDir 临时目录  
137 - * @return File 对象  
138 - */  
139 - public static File multipartFileToFile(MultipartFile multipartFile, String tempDir) throws IOException {  
140 - if (multipartFile == null || multipartFile.isEmpty()) {  
141 - return null;  
142 - }  
143 -  
144 - // 获取文件扩展名  
145 - String originalFilename = multipartFile.getOriginalFilename();  
146 - String extension = getFileExtension(originalFilename);  
147 -  
148 - // 生成唯一文件名  
149 - String uniqueFilename = UUID.randomUUID().toString() + extension;  
150 - String filePath = tempDir + File.separator + uniqueFilename;  
151 -  
152 - File file = new File(filePath);  
153 - multipartFile.transferTo(file);  
154 -  
155 - log.debug("创建临时文件: {}", filePath);  
156 - return file;  
157 - }  
158 -  
159 - /**  
160 - * 执行 OCR 识别  
161 - */  
162 - private static String performOcr(String imagePath) {  
163 - try {  
164 - // 获取引擎实例  
165 - InferenceEngine engine = getEngine();  
166 -  
167 - // 创建参数配置  
168 - ParamConfig config = createOptimizedParamConfig();  
169 -  
170 - // 执行识别  
171 - long startTime = System.currentTimeMillis();  
172 - OcrResult ocrResult = engine.runOcr(imagePath, config);  
173 - long endTime = System.currentTimeMillis();  
174 -  
175 - log.info("OCR 识别耗时: {} ms", (endTime - startTime));  
176 -  
177 - // 输出文本块详情(DEBUG 级别)  
178 - if (log.isDebugEnabled() && ocrResult.getTextBlocks() != null) {  
179 - List<TextBlock> textBlocks = ocrResult.getTextBlocks();  
180 - log.debug("识别到 {} 个文本块", textBlocks.size());  
181 - for (int i = 0; i < textBlocks.size(); i++) {  
182 - TextBlock block = textBlocks.get(i);  
183 - log.debug(" 块{}: {} (置信度: {})",  
184 - i + 1, block.getText(), block.getBoxScore());  
185 - }  
186 - }  
187 -  
188 - return ocrResult.getStrRes().trim();  
189 -  
190 - } catch (Exception e) {  
191 - log.error("执行 OCR 识别失败: {}", e.getMessage(), e);  
192 - return StrUtil.EMPTY;  
193 - }  
194 - }  
195 -  
196 - /**  
197 - * 保存预处理后的图片  
198 - */  
199 - private static String saveProcessedImage(BufferedImage image, String tempDir) throws IOException {  
200 - if (image == null) {  
201 - return null; 66 + if (tempImageFile != null) tempImageFile.delete();
  67 + if (processedPath != null) new File(processedPath).delete();
202 } 68 }
203 -  
204 - String filename = "processed_" + System.currentTimeMillis() + "_" + UUID.randomUUID().toString() + ".png";  
205 - String filePath = tempDir + File.separator + filename;  
206 -  
207 - File outputFile = new File(filePath);  
208 - ImageIO.write(image, "png", outputFile);  
209 -  
210 - log.debug("保存预处理图片: {}", filePath);  
211 - return filePath;  
212 } 69 }
213 70
214 - /**  
215 - * 清理临时文件  
216 - */  
217 - private static void cleanupTempFiles(File tempImageFile, String processedImagePath) {  
218 - // 清理原始临时文件  
219 - if (tempImageFile != null && tempImageFile.exists()) {  
220 - boolean deleted = tempImageFile.delete();  
221 - if (deleted) {  
222 - log.debug("删除临时文件: {}", tempImageFile.getPath());  
223 - } else {  
224 - log.warn("删除临时文件失败: {}", tempImageFile.getPath());  
225 - tempImageFile.deleteOnExit();  
226 - }  
227 - }  
228 -  
229 - // 清理预处理图片  
230 - if (StrUtil.isNotBlank(processedImagePath)) {  
231 - File processedFile = new File(processedImagePath);  
232 - if (processedFile.exists()) {  
233 - boolean deleted = processedFile.delete();  
234 - if (deleted) {  
235 - log.debug("删除预处理图片: {}", processedImagePath);  
236 - } else {  
237 - log.warn("删除预处理图片失败: {}", processedImagePath);  
238 - processedFile.deleteOnExit();  
239 - }  
240 - } 71 + private static File multipartFileToFile(MultipartFile file, String dir) throws IOException {
  72 + File temp = new File(dir, UUID.randomUUID() + "_" + file.getOriginalFilename());
  73 + try (InputStream in = file.getInputStream()) {
  74 + Files.copy(in, temp.toPath());
241 } 75 }
  76 + return temp;
242 } 77 }
243 78
244 - /**  
245 - * 创建优化的参数配置  
246 - */  
247 - private static ParamConfig createOptimizedParamConfig() {  
248 - ParamConfig config = new ParamConfig();  
249 -  
250 - // 文本区域扩展  
251 - config.setPadding(50);  
252 -  
253 - // 最大边长限制(0 表示不限制)  
254 - config.setMaxSideLen(0);  
255 -  
256 - // 文本块置信度阈值  
257 - config.setBoxScoreThresh(0.4f);  
258 - config.setBoxThresh(0.25f);  
259 -  
260 - // 文本区域扩展比例  
261 - config.setUnClipRatio(1.8f);  
262 -  
263 - // 角度检测  
264 - config.setDoAngle(true);  
265 - config.setMostAngle(true);  
266 -  
267 - log.debug("OCR 参数配置: padding={}, unClipRatio={}",  
268 - config.getPadding(), config.getUnClipRatio());  
269 -  
270 - return config;  
271 - }  
272 -  
273 - /**  
274 - * 图像预处理  
275 - */  
276 - private static BufferedImage preprocessImage(File imageFile) throws IOException {  
277 - BufferedImage original = ImageIO.read(imageFile);  
278 - if (original == null) {  
279 - throw new IOException("无法读取图片: " + imageFile.getPath());  
280 - }  
281 -  
282 - log.debug("原始图片尺寸: {}x{}", original.getWidth(), original.getHeight());  
283 -  
284 - BufferedImage processed = original;  
285 -  
286 - // 1. 如果图片太大,缩小尺寸  
287 - if (processed.getWidth() > 2000 || processed.getHeight() > 2000) {  
288 - processed = resizeImage(processed, 1600, 1600);  
289 - log.debug("缩小图片尺寸: {}x{}", processed.getWidth(), processed.getHeight()); 79 + // ✅ 安全预处理
  80 + private static BufferedImage preprocessImage(File file) throws IOException {
  81 + BufferedImage original = ImageIO.read(file);
  82 + BufferedImage rgb = new BufferedImage(original.getWidth(), original.getHeight(), BufferedImage.TYPE_INT_RGB);
  83 + Graphics2D g = rgb.createGraphics();
  84 + g.drawImage(original, 0, 0, null);
  85 + g.dispose();
  86 + if (rgb.getWidth() > 1600) {
  87 + return resizeImage(rgb, 1280, 1280);
290 } 88 }
291 -  
292 - // 2. 增强对比度  
293 - processed = enhanceContrast(processed);  
294 -  
295 - return processed; 89 + return rgb;
296 } 90 }
297 91
298 - /**  
299 - * 调整图片大小  
300 - */  
301 - private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) {  
302 - int w = image.getWidth();  
303 - int h = image.getHeight();  
304 -  
305 - // 计算缩放比例  
306 - double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h);  
307 - if (ratio >= 1.0) {  
308 - return image;  
309 - }  
310 -  
311 - int newW = (int) (w * ratio);  
312 - int newH = (int) (h * ratio);  
313 - 92 + private static BufferedImage resizeImage(BufferedImage img, int w, int h) {
  93 + int width = img.getWidth();
  94 + int height = img.getHeight();
  95 + double ratio = Math.min((double) w / width, (double) h / height);
  96 + if (ratio >= 1) return img;
  97 + int newW = (int) (width * ratio);
  98 + int newH = (int) (height * ratio);
314 BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB); 99 BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB);
315 Graphics2D g = resized.createGraphics(); 100 Graphics2D g = resized.createGraphics();
316 g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); 101 g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
317 - g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);  
318 - g.drawImage(image, 0, 0, newW, newH, null); 102 + g.drawImage(img, 0, 0, newW, newH, null);
319 g.dispose(); 103 g.dispose();
320 -  
321 return resized; 104 return resized;
322 } 105 }
323 106
324 - /**  
325 - * 增强对比度  
326 - */  
327 - private static BufferedImage enhanceContrast(BufferedImage image) {  
328 - BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType());  
329 -  
330 - for (int y = 0; y < image.getHeight(); y++) {  
331 - for (int x = 0; x < image.getWidth(); x++) {  
332 - Color c = new Color(image.getRGB(x, y));  
333 - int r = Math.min(255, (int) (c.getRed() * 1.15));  
334 - int g = Math.min(255, (int) (c.getGreen() * 1.15));  
335 - int b = Math.min(255, (int) (c.getBlue() * 1.15));  
336 - result.setRGB(x, y, new Color(r, g, b).getRGB());  
337 - }  
338 - }  
339 -  
340 - return result; 107 + private static String saveProcessedImage(BufferedImage img, String dir) throws IOException {
  108 + String name = UUID.randomUUID() + "_proc.png";
  109 + File out = new File(dir, name);
  110 + ImageIO.write(img, "png", out);
  111 + return out.getAbsolutePath();
341 } 112 }
342 113
343 - /**  
344 - * 获取文件扩展名  
345 - */  
346 - private static String getFileExtension(String filename) {  
347 - if (StrUtil.isBlank(filename)) {  
348 - return ".jpg";  
349 - }  
350 - int lastDotIndex = filename.lastIndexOf(".");  
351 - if (lastDotIndex == -1) {  
352 - return ".jpg"; 114 + // ================================
  115 + // ✅【关键修复】参数永不崩溃
  116 + // ================================
  117 + private static String performOcr(String path) {
  118 + try {
  119 + InferenceEngine engine = getEngine();
  120 + ParamConfig c = new ParamConfig();
  121 +
  122 + // ❌ 错误的致命参数 👇 已经删除
  123 + // config.setPadding(50);
  124 +
  125 + // ✅ 安全稳定参数
  126 + c.setPadding(5); // 很小,不崩溃
  127 + c.setMaxSideLen(1536);
  128 + c.setBoxScoreThresh(0.4f);
  129 + c.setBoxThresh(0.3f);
  130 + c.setUnClipRatio(1.3f);
  131 + c.setDoAngle(true);
  132 + c.setMostAngle(true);
  133 +
  134 + OcrResult res = engine.runOcr(path.replace("\\", "/"), c);
  135 + return res.getStrRes().trim();
  136 + } catch (Exception e) {
  137 + return StrUtil.EMPTY;
353 } 138 }
354 - return filename.substring(lastDotIndex);  
355 } 139 }
356 140
357 - /**  
358 - * 测试方法  
359 - */ 141 + // 测试
360 public static void main(String[] args) { 142 public static void main(String[] args) {
361 - String tempDir = "D:/temp/ocrJava";  
362 -  
363 - // 测试识别  
364 try { 143 try {
365 - String imagePath = "E:/aa/b.jpg";  
366 - File imageFile = new File(imagePath);  
367 - if (!imageFile.exists()) {  
368 - System.err.println("图片文件不存在: " + imagePath);  
369 - return; 144 + getEngine();
  145 + System.out.println("✅ 引擎启动成功");
  146 +
  147 + File img = new File("E:/aa/b.jpg");
  148 + if (img.exists()) {
  149 + BufferedImage proc = preprocessImage(img);
  150 + String p = saveProcessedImage(proc, TEMP_PATH);
  151 + String result = performOcr(p);
  152 +
  153 + System.out.println("=====================================");
  154 + System.out.println(result);
  155 + System.out.println("=====================================");
  156 + new File(p).delete();
370 } 157 }
371 -  
372 - // 手动测试(实际使用中应该通过 MultipartFile)  
373 - BufferedImage processedImage = preprocessImage(imageFile);  
374 - String processedPath = saveProcessedImage(processedImage, tempDir);  
375 - String result = performOcr(processedPath);  
376 -  
377 - System.out.println("识别结果: " + result);  
378 -  
379 - // 清理  
380 - new File(processedPath).delete();  
381 -  
382 } catch (Exception e) { 158 } catch (Exception e) {
383 e.printStackTrace(); 159 e.printStackTrace();
384 } 160 }