Commit b436869234a3250081525b7be1f363b0adda3be4

Authored by qianbao
1 parent f2e11f8f

添加向量库

src/main/java/com/xly/ocr/service/OcrService.java
@@ -2,21 +2,10 @@ package com.xly.ocr.service; @@ -2,21 +2,10 @@ package com.xly.ocr.service;
2 2
3 import com.xly.ocr.util.OcrUtil; 3 import com.xly.ocr.util.OcrUtil;
4 import lombok.extern.slf4j.Slf4j; 4 import lombok.extern.slf4j.Slf4j;
5 -import net.sourceforge.tess4j.Tesseract;  
6 -import net.sourceforge.tess4j.TesseractException;  
7 -import org.slf4j.Logger;  
8 -import org.slf4j.LoggerFactory;  
9 import org.springframework.beans.factory.annotation.Value; 5 import org.springframework.beans.factory.annotation.Value;
10 import org.springframework.stereotype.Service; 6 import org.springframework.stereotype.Service;
11 import org.springframework.web.multipart.MultipartFile; 7 import org.springframework.web.multipart.MultipartFile;
12 8
13 -import javax.imageio.ImageIO;  
14 -import java.awt.*;  
15 -import java.awt.image.BufferedImage;  
16 -import java.io.File;  
17 -import java.io.IOException;  
18 -import java.nio.file.Files;  
19 -import java.nio.file.Path;  
20 import java.util.Arrays; 9 import java.util.Arrays;
21 import java.util.List; 10 import java.util.List;
22 11
@@ -24,473 +13,64 @@ import java.util.List; @@ -24,473 +13,64 @@ import java.util.List;
24 @Service("ocrService") 13 @Service("ocrService")
25 public class OcrService { 14 public class OcrService {
26 15
27 - private static final Logger logger = LoggerFactory.getLogger(OcrService.class);  
28 -  
29 @Value("${ocr.tmpPath}") 16 @Value("${ocr.tmpPath}")
30 private String tmpPath; 17 private String tmpPath;
31 18
32 -  
33 - private final Tesseract tesseract;  
34 -  
35 - // 配置参数  
36 - private static final List<String> ALLOWED_EXTENSIONS = Arrays.asList(".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"); 19 + private static final List<String> ALLOWED_EXTENSIONS = Arrays.asList(
  20 + ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"
  21 + );
37 private static final long MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB 22 private static final long MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
38 - private static final int BINARIZE_THRESHOLD = 127;  
39 - private static final int MIN_WIDTH = 800;  
40 - private static final int MIN_HEIGHT = 200;  
41 -  
42 - // 性能统计  
43 - private static class OcrStats {  
44 - long preprocessTime = 0;  
45 - long ocrTime = 0;  
46 - String imageSize = "";  
47 -  
48 - @Override  
49 - public String toString() {  
50 - return String.format("预处理耗时: %dms, OCR耗时: %dms, 图片尺寸: %s",  
51 - preprocessTime, ocrTime, imageSize);  
52 - }  
53 - }  
54 -  
55 - public OcrService(@Value("${tesseract.datapath}") String dataPath) {  
56 - this.tesseract = new Tesseract();  
57 -  
58 - // 基础配置  
59 - this.tesseract.setDatapath(dataPath);  
60 - this.tesseract.setLanguage("chi_sim+eng");  
61 -  
62 - // 优化识别参数  
63 - configureTesseract();  
64 -  
65 - logger.info("Tesseract 初始化完成,语言包路径: {}, 语言: chi_sim+eng", dataPath);  
66 - }  
67 -  
68 - /**  
69 - * 配置 Tesseract 参数  
70 - */  
71 - private void configureTesseract() {  
72 - // 页面分割模式:3 = 自动页面分割,但没有方向检测  
73 - this.tesseract.setPageSegMode(3);  
74 -  
75 - // OCR 引擎模式:3 = 默认,基于 LSTM 和传统引擎  
76 - this.tesseract.setOcrEngineMode(3);  
77 -  
78 - // 提高中文识别率  
79 - this.tesseract.setVariable("preserve_interword_spaces", "1");  
80 - this.tesseract.setVariable("textord_force_make_prop_words", "true");  
81 -  
82 - // 可选:设置字符白名单(根据需要启用)  
83 - // this.tesseract.setVariable("tessedit_char_whitelist",  
84 - // "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,。!?;:\"‘’“”【】()《》");  
85 -  
86 - // 可选:设置黑名单(排除干扰字符)  
87 - // this.tesseract.setVariable("tessedit_char_blacklist", "|\\/`~@#$%^&*()_+={}[]");  
88 - }  
89 -  
90 - /**  
91 - * 图片预处理 - 优化的处理流程  
92 - */  
93 - private BufferedImage preprocessImage(BufferedImage originalImage) {  
94 - if (originalImage == null) {  
95 - return null;  
96 - }  
97 -  
98 - try {  
99 - long startTime = System.currentTimeMillis();  
100 -  
101 - // 1. 自动调整亮度和对比度  
102 - BufferedImage adjusted = autoAdjustBrightnessContrast(originalImage);  
103 -  
104 - // 2. 灰度化  
105 - BufferedImage grayImage = toGray(adjusted);  
106 -  
107 - // 3. 自适应二值化(比固定阈值更好)  
108 - BufferedImage binaryImage = adaptiveBinarize(grayImage);  
109 -  
110 - // 4. 降噪处理  
111 - BufferedImage denoisedImage = denoise(binaryImage);  
112 -  
113 - // 5. 放大图片(如果太小)  
114 - BufferedImage scaledImage = scaleImageIfNeeded(denoisedImage);  
115 -  
116 - // 6. 可选:边缘增强(提高清晰度)  
117 - BufferedImage enhancedImage = sharpen(scaledImage);  
118 -  
119 - long endTime = System.currentTimeMillis();  
120 - logger.debug("图片预处理耗时: {}ms", endTime - startTime);  
121 -  
122 - return enhancedImage;  
123 -  
124 - } catch (Exception e) {  
125 - logger.error("图片预处理失败: {}", e.getMessage(), e);  
126 - return originalImage;  
127 - }  
128 - }  
129 -  
130 - /**  
131 - * 自动调整亮度和对比度  
132 - */  
133 - private BufferedImage autoAdjustBrightnessContrast(BufferedImage image) {  
134 - BufferedImage result = new BufferedImage(  
135 - image.getWidth(), image.getHeight(), image.getType());  
136 -  
137 - // 计算亮度直方图  
138 - int[] histogram = new int[256];  
139 - for (int y = 0; y < image.getHeight(); y++) {  
140 - for (int x = 0; x < image.getWidth(); x++) {  
141 - int rgb = image.getRGB(x, y);  
142 - int gray = (int)((rgb >> 16 & 0xFF) * 0.299 +  
143 - (rgb >> 8 & 0xFF) * 0.587 +  
144 - (rgb & 0xFF) * 0.114);  
145 - histogram[gray]++;  
146 - }  
147 - }  
148 -  
149 - // 找到黑色和白色的阈值  
150 - int total = image.getWidth() * image.getHeight();  
151 - int blackThreshold = 0;  
152 - int whiteThreshold = 255;  
153 -  
154 - int sum = 0;  
155 - for (int i = 0; i < 256; i++) {  
156 - sum += histogram[i];  
157 - if (sum > total * 0.05) {  
158 - blackThreshold = i;  
159 - break;  
160 - }  
161 - }  
162 -  
163 - sum = 0;  
164 - for (int i = 255; i >= 0; i--) {  
165 - sum += histogram[i];  
166 - if (sum > total * 0.05) {  
167 - whiteThreshold = i;  
168 - break;  
169 - }  
170 - }  
171 -  
172 - // 应用对比度拉伸  
173 - for (int y = 0; y < image.getHeight(); y++) {  
174 - for (int x = 0; x < image.getWidth(); x++) {  
175 - int rgb = image.getRGB(x, y);  
176 - int r = (rgb >> 16) & 0xFF;  
177 - int g = (rgb >> 8) & 0xFF;  
178 - int b = rgb & 0xFF;  
179 -  
180 - // 拉伸到 0-255 范围  
181 - r = stretchValue(r, blackThreshold, whiteThreshold);  
182 - g = stretchValue(g, blackThreshold, whiteThreshold);  
183 - b = stretchValue(b, blackThreshold, whiteThreshold);  
184 -  
185 - result.setRGB(x, y, (r << 16) | (g << 8) | b);  
186 - }  
187 - }  
188 -  
189 - return result;  
190 - }  
191 -  
192 - private int stretchValue(int value, int black, int white) {  
193 - if (value <= black) return 0;  
194 - if (value >= white) return 255;  
195 - return (value - black) * 255 / (white - black);  
196 - }  
197 -  
198 - /**  
199 - * 灰度化  
200 - */  
201 - private BufferedImage toGray(BufferedImage image) {  
202 - BufferedImage result = new BufferedImage(  
203 - image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);  
204 - Graphics g = result.getGraphics();  
205 - g.drawImage(image, 0, 0, null);  
206 - g.dispose();  
207 - return result;  
208 - }  
209 -  
210 - /**  
211 - * 自适应二值化 - 根据局部区域动态调整阈值  
212 - */  
213 - private BufferedImage adaptiveBinarize(BufferedImage image) {  
214 - BufferedImage result = new BufferedImage(  
215 - image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);  
216 -  
217 - int blockSize = 15;  
218 - int constant = 5;  
219 -  
220 - for (int y = 0; y < image.getHeight(); y++) {  
221 - for (int x = 0; x < image.getWidth(); x++) {  
222 - // 计算局部区域的平均值  
223 - int sum = 0;  
224 - int count = 0;  
225 - for (int ky = -blockSize/2; ky <= blockSize/2; ky++) {  
226 - for (int kx = -blockSize/2; kx <= blockSize/2; kx++) {  
227 - int px = Math.min(Math.max(x + kx, 0), image.getWidth() - 1);  
228 - int py = Math.min(Math.max(y + ky, 0), image.getHeight() - 1);  
229 - sum += new Color(image.getRGB(px, py)).getRed();  
230 - count++;  
231 - }  
232 - }  
233 - int threshold = sum / count - constant;  
234 -  
235 - // 应用阈值  
236 - int gray = new Color(image.getRGB(x, y)).getRed();  
237 - int binary = gray > threshold ? 255 : 0;  
238 - result.setRGB(x, y, new Color(binary, binary, binary).getRGB());  
239 - }  
240 - }  
241 - return result;  
242 - }  
243 23
244 /** 24 /**
245 - * 降噪 - 优化的中值滤波 25 + * 从 MultipartFile 中提取文字
246 */ 26 */
247 - private BufferedImage denoise(BufferedImage image) {  
248 - BufferedImage result = new BufferedImage(  
249 - image.getWidth(), image.getHeight(), image.getType());  
250 -  
251 - for (int y = 1; y < image.getHeight() - 1; y++) {  
252 - for (int x = 1; x < image.getWidth() - 1; x++) {  
253 - int[] neighbors = new int[9];  
254 - int index = 0;  
255 - for (int ky = -1; ky <= 1; ky++) {  
256 - for (int kx = -1; kx <= 1; kx++) {  
257 - neighbors[index++] = new Color(image.getRGB(x + kx, y + ky)).getRed();  
258 - }  
259 - }  
260 - Arrays.sort(neighbors);  
261 - int median = neighbors[4];  
262 - result.setRGB(x, y, new Color(median, median, median).getRGB());  
263 - }  
264 - }  
265 -  
266 - // 处理边缘  
267 - for (int x = 0; x < image.getWidth(); x++) {  
268 - result.setRGB(x, 0, image.getRGB(x, 0));  
269 - result.setRGB(x, image.getHeight() - 1, image.getRGB(x, image.getHeight() - 1));  
270 - }  
271 - for (int y = 0; y < image.getHeight(); y++) {  
272 - result.setRGB(0, y, image.getRGB(0, y));  
273 - result.setRGB(image.getWidth() - 1, y, image.getRGB(image.getWidth() - 1, y));  
274 - }  
275 -  
276 - return result;  
277 - }  
278 -  
279 - /**  
280 - * 锐化处理 - 增强文字边缘  
281 - */  
282 - private BufferedImage sharpen(BufferedImage image) {  
283 - BufferedImage result = new BufferedImage(  
284 - image.getWidth(), image.getHeight(), image.getType());  
285 -  
286 - // 拉普拉斯锐化核  
287 - float[] sharpenKernel = {  
288 - 0, -1, 0,  
289 - -1, 5, -1,  
290 - 0, -1, 0  
291 - };  
292 -  
293 - for (int y = 1; y < image.getHeight() - 1; y++) {  
294 - for (int x = 1; x < image.getWidth() - 1; x++) {  
295 - int sum = 0;  
296 - int index = 0;  
297 - for (int ky = -1; ky <= 1; ky++) {  
298 - for (int kx = -1; kx <= 1; kx++) {  
299 - int gray = new Color(image.getRGB(x + kx, y + ky)).getRed();  
300 - sum += gray * sharpenKernel[index++];  
301 - }  
302 - }  
303 - sum = Math.min(255, Math.max(0, sum));  
304 - result.setRGB(x, y, new Color(sum, sum, sum).getRGB());  
305 - }  
306 - }  
307 -  
308 - return result;  
309 - }  
310 -  
311 - /**  
312 - * 放大图片(如果图片太小)  
313 - */  
314 - private BufferedImage scaleImageIfNeeded(BufferedImage image) {  
315 - int width = image.getWidth();  
316 - int height = image.getHeight();  
317 -  
318 - if (width >= MIN_WIDTH && height >= MIN_HEIGHT) {  
319 - return image;  
320 - }  
321 -  
322 - double scaleX = (double) MIN_WIDTH / width;  
323 - double scaleY = (double) MIN_HEIGHT / height;  
324 - double scale = Math.max(scaleX, scaleY);  
325 -  
326 - int newWidth = (int) (width * scale);  
327 - int newHeight = (int) (height * scale);  
328 -  
329 - // 使用更好的插值算法  
330 - BufferedImage result = new BufferedImage(newWidth, newHeight, image.getType());  
331 - Graphics2D g2d = result.createGraphics();  
332 - g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION,  
333 - RenderingHints.VALUE_INTERPOLATION_BICUBIC);  
334 - g2d.setRenderingHint(RenderingHints.KEY_RENDERING,  
335 - RenderingHints.VALUE_RENDER_QUALITY);  
336 - g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING,  
337 - RenderingHints.VALUE_ANTIALIAS_ON);  
338 - g2d.drawImage(image, 0, 0, newWidth, newHeight, null);  
339 - g2d.dispose();  
340 -  
341 - logger.debug("图片已放大: {}x{} -> {}x{}", width, height, newWidth, newHeight);  
342 - return result;  
343 - }  
344 -  
345 - /**  
346 - * 识别图片中的文字(增强版)  
347 - */  
348 - public String extractText(File imageFile) {  
349 - if (imageFile == null || !imageFile.exists()) {  
350 - logger.error("图片文件不存在或为空");  
351 - return "图片文件不存在"; 27 + public String extractTextFromMultipartFile(MultipartFile file) {
  28 + // 1. 验证文件
  29 + String validationError = validateFile(file);
  30 + if (validationError != null) {
  31 + return validationError;
352 } 32 }
353 33
354 - OcrStats stats = new OcrStats();  
355 - 34 + // 2. 调用 OCR 工具类识别
356 try { 35 try {
357 - logger.info("开始识别图片: {}, 大小: {} bytes",  
358 - imageFile.getAbsolutePath(), imageFile.length()); 36 + String result = OcrUtil.ocrFile(file, tmpPath);
359 37
360 - // 读取原始图片  
361 - long readStart = System.currentTimeMillis();  
362 - BufferedImage originalImage = ImageIO.read(imageFile);  
363 - if (originalImage == null) {  
364 - return "无法读取图片文件,请确保图片格式正确";  
365 - }  
366 - stats.imageSize = originalImage.getWidth() + "x" + originalImage.getHeight();  
367 -  
368 - // 图片预处理  
369 - long preprocessStart = System.currentTimeMillis();  
370 - BufferedImage processedImage = preprocessImage(originalImage);  
371 - stats.preprocessTime = System.currentTimeMillis() - preprocessStart;  
372 -  
373 - // 可选:保存预处理图片用于调试(生产环境可注释)  
374 - if (logger.isDebugEnabled()) {  
375 - saveDebugImage(processedImage, imageFile);  
376 - }  
377 -  
378 - // 执行 OCR  
379 - long ocrStart = System.currentTimeMillis();  
380 - String result = tesseract.doOCR(processedImage);  
381 - stats.ocrTime = System.currentTimeMillis() - ocrStart;  
382 -  
383 - logger.info("识别完成 - {}", stats);  
384 -  
385 - // 清理识别结果  
386 - result = cleanResult(result);  
387 -  
388 - if (result.isEmpty()) {  
389 - logger.warn("识别结果为空,可能需要调整预处理参数"); 38 + if (result == null || result.trim().isEmpty()) {
  39 + log.warn("未识别到文字内容,文件: {}", file.getOriginalFilename());
  40 + return "未识别到文字内容";
390 } 41 }
391 42
  43 + log.info("识别成功,文件: {}, 文字长度: {}",
  44 + file.getOriginalFilename(), result.length());
392 return result; 45 return result;
393 46
394 - } catch (TesseractException e) {  
395 - logger.error("OCR识别失败: {}", e.getMessage(), e); 47 + } catch (Exception e) {
  48 + log.error("OCR识别异常: {}", e.getMessage(), e);
396 return "OCR识别失败: " + e.getMessage(); 49 return "OCR识别失败: " + e.getMessage();
397 - } catch (IOException e) {  
398 - logger.error("读取图片失败: {}", e.getMessage(), e);  
399 - return "读取图片失败: " + e.getMessage();  
400 - }  
401 - }  
402 -  
403 - /**  
404 - * 保存调试图片(仅用于调试)  
405 - */  
406 - private void saveDebugImage(BufferedImage image, File originalFile) {  
407 - try {  
408 - String debugPath = originalFile.getParent() + "/debug_" + originalFile.getName();  
409 - File debugFile = new File(debugPath);  
410 - ImageIO.write(image, "png", debugFile);  
411 - logger.debug("预处理图片已保存: {}", debugPath);  
412 - } catch (IOException e) {  
413 - logger.debug("保存调试图片失败: {}", e.getMessage());  
414 - }  
415 - }  
416 -  
417 - /**  
418 - * 清理识别结果  
419 - */  
420 - private String cleanResult(String result) {  
421 - if (result == null || result.isEmpty()) {  
422 - return "";  
423 - }  
424 -  
425 - // 去除首尾空白  
426 - result = result.trim();  
427 -  
428 - // 规范化换行符  
429 - result = result.replaceAll("\\r\\n", "\n")  
430 - .replaceAll("\\r", "\n");  
431 -  
432 - // 合并多个空行  
433 - result = result.replaceAll("\n{3,}", "\n\n");  
434 -  
435 - // 去除行首行尾空格  
436 - String[] lines = result.split("\n");  
437 - StringBuilder cleaned = new StringBuilder();  
438 - for (String line : lines) {  
439 - cleaned.append(line.trim()).append("\n");  
440 } 50 }
441 -  
442 - return cleaned.toString().trim();  
443 } 51 }
444 52
445 /** 53 /**
446 - * 封装方法,接收上传的 MultipartFile 54 + * 验证文件
447 */ 55 */
448 - public String extractTextFromMultipartFile(MultipartFile file) { 56 + private String validateFile(MultipartFile file) {
449 if (file == null || file.isEmpty()) { 57 if (file == null || file.isEmpty()) {
450 - logger.warn("上传的文件为空"); 58 + log.warn("上传的文件为空");
451 return "上传的文件为空"; 59 return "上传的文件为空";
452 } 60 }
453 61
454 - // 验证文件大小  
455 if (file.getSize() > MAX_FILE_SIZE) { 62 if (file.getSize() > MAX_FILE_SIZE) {
456 - logger.warn("文件过大: {} bytes, 超过限制: {} bytes",  
457 - file.getSize(), MAX_FILE_SIZE); 63 + log.warn("文件过大: {} bytes", file.getSize());
458 return String.format("文件过大,最大支持 %dMB", MAX_FILE_SIZE / 1024 / 1024); 64 return String.format("文件过大,最大支持 %dMB", MAX_FILE_SIZE / 1024 / 1024);
459 } 65 }
460 66
461 - // 验证文件格式  
462 String originalFilename = file.getOriginalFilename(); 67 String originalFilename = file.getOriginalFilename();
463 if (originalFilename != null && !isAllowedImage(originalFilename)) { 68 if (originalFilename != null && !isAllowedImage(originalFilename)) {
464 - logger.warn("不支持的文件格式: {}", originalFilename); 69 + log.warn("不支持的文件格式: {}", originalFilename);
465 return "不支持的文件格式,仅支持: " + String.join(", ", ALLOWED_EXTENSIONS); 70 return "不支持的文件格式,仅支持: " + String.join(", ", ALLOWED_EXTENSIONS);
466 } 71 }
467 - String sText = OcrUtil.ocrFile(file,tmpPath);  
468 - return sText;  
469 - }  
470 -  
471 - /**  
472 - * 清理临时文件  
473 - */  
474 - private void cleanupTempFile(Path tempFile) {  
475 - if (tempFile != null) {  
476 - try {  
477 - Files.deleteIfExists(tempFile);  
478 - logger.debug("临时文件已删除: {}", tempFile);  
479 - } catch (IOException e) {  
480 - logger.warn("删除临时文件失败: {}", tempFile, e);  
481 - // 注册JVM退出时删除  
482 - tempFile.toFile().deleteOnExit();  
483 - }  
484 - }  
485 - }  
486 72
487 - /**  
488 - * 批量识别(用于多张图片)  
489 - */  
490 - public List<String> batchExtractText(List<MultipartFile> files) {  
491 - return files.stream()  
492 - .map(this::extractTextFromMultipartFile)  
493 - .collect(java.util.stream.Collectors.toList()); 73 + return null;
494 } 74 }
495 75
496 /** 76 /**
@@ -504,14 +84,4 @@ public class OcrService { @@ -504,14 +84,4 @@ public class OcrService {
504 return ALLOWED_EXTENSIONS.stream() 84 return ALLOWED_EXTENSIONS.stream()
505 .anyMatch(lowerFilename::endsWith); 85 .anyMatch(lowerFilename::endsWith);
506 } 86 }
507 -  
508 - /**  
509 - * 获取文件扩展名  
510 - */  
511 - private String getFileExtension(String filename) {  
512 - if (filename == null || !filename.contains(".")) {  
513 - return ".jpg";  
514 - }  
515 - return filename.substring(filename.lastIndexOf("."));  
516 - }  
517 } 87 }
518 \ No newline at end of file 88 \ No newline at end of file
src/main/java/com/xly/ocr/util/OcrUtil.java
@@ -16,107 +16,262 @@ import java.awt.image.BufferedImage; @@ -16,107 +16,262 @@ import java.awt.image.BufferedImage;
16 import java.io.File; 16 import java.io.File;
17 import java.io.IOException; 17 import java.io.IOException;
18 import java.util.List; 18 import java.util.List;
  19 +import java.util.UUID;
19 20
20 @Slf4j 21 @Slf4j
21 public class OcrUtil { 22 public class OcrUtil {
22 23
23 - static { 24 + // 引擎实例(单例,避免重复初始化)
  25 + private static volatile InferenceEngine engine;
  26 + private static final Object LOCK = new Object();
  27 +
  28 + /**
  29 + * 获取 OCR 引擎实例(懒加载单例)
  30 + */
  31 + private static InferenceEngine getEngine() {
  32 + if (engine == null) {
  33 + synchronized (LOCK) {
  34 + if (engine == null) {
  35 + try {
  36 + log.info("初始化 OCR 引擎 (PP-OCRv4)...");
  37 + engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);
  38 + log.info("OCR 引擎初始化成功");
  39 + } catch (Exception e) {
  40 + log.error("OCR 引擎初始化失败: {}", e.getMessage(), e);
  41 + throw new RuntimeException("OCR 引擎初始化失败", e);
  42 + }
  43 + }
  44 + }
  45 + }
  46 + return engine;
  47 + }
  48 +
  49 + /**
  50 + * 识别图片中的文字
  51 + * @param imageFile 上传的图片文件
  52 + * @param tempDir 临时目录路径
  53 + * @return 识别出的文字
  54 + */
  55 + public static String ocrFile(MultipartFile imageFile, String tempDir) {
  56 + File tempImageFile = null;
  57 + String processedImagePath = null;
  58 +
24 try { 59 try {
25 - String customTempDir = "D:/temp/ocrJava";  
26 - File tempDir = new File(customTempDir);  
27 - if (!tempDir.exists()) {  
28 - tempDir.mkdirs(); 60 + log.info("开始 OCR 识别,文件: {}", imageFile.getOriginalFilename());
  61 +
  62 + // 1. 验证输入
  63 + if (imageFile == null || imageFile.isEmpty()) {
  64 + log.warn("图片文件为空");
  65 + return StrUtil.EMPTY;
  66 + }
  67 +
  68 + // 2. 创建临时目录
  69 + ensureTempDirExists(tempDir);
  70 +
  71 + // 3. MultipartFile 转 File
  72 + tempImageFile = multipartFileToFile(imageFile, tempDir);
  73 + if (tempImageFile == null || !tempImageFile.exists()) {
  74 + log.error("转换临时文件失败");
  75 + return StrUtil.EMPTY;
  76 + }
  77 +
  78 + // 4. 图像预处理
  79 + BufferedImage processedImage = preprocessImage(tempImageFile);
  80 + if (processedImage == null) {
  81 + log.error("图像预处理失败");
  82 + return StrUtil.EMPTY;
  83 + }
  84 +
  85 + // 5. 保存预处理图片
  86 + processedImagePath = saveProcessedImage(processedImage, tempDir);
  87 + if (processedImagePath == null) {
  88 + log.error("保存预处理图片失败");
  89 + return StrUtil.EMPTY;
  90 + }
  91 +
  92 + // 6. 执行 OCR 识别
  93 + String text = performOcr(processedImagePath);
  94 +
  95 + // 7. 记录识别结果
  96 + if (StrUtil.isNotBlank(text)) {
  97 + log.info("OCR 识别成功,文字长度: {} 字符", text.length());
  98 + log.debug("识别结果: {}", text);
  99 + } else {
  100 + log.warn("OCR 识别结果为空");
29 } 101 }
30 - System.setProperty("java.io.tmpdir", customTempDir);  
31 - System.setProperty("TMP", customTempDir);  
32 - System.setProperty("TEMP", customTempDir);  
33 102
34 - System.out.println("==================================");  
35 - System.out.println("临时目录: " + System.getProperty("java.io.tmpdir"));  
36 - System.out.println("=================================="); 103 + return text;
37 104
38 } catch (Exception e) { 105 } catch (Exception e) {
39 - System.err.println("设置临时目录失败: " + e.getMessage()); 106 + log.error("OCR 识别失败: {}", e.getMessage(), e);
  107 + return StrUtil.EMPTY;
  108 + } finally {
  109 + // 清理临时文件
  110 + cleanupTempFiles(tempImageFile, processedImagePath);
40 } 111 }
41 } 112 }
42 113
43 - public static String ocrFile(MultipartFile imageFile, String sTmpPath){  
44 - String processedImagePath = StrUtil.EMPTY; 114 + /**
  115 + * 确保临时目录存在
  116 + */
  117 + private static void ensureTempDirExists(String tempDir) {
  118 + if (StrUtil.isBlank(tempDir)) {
  119 + tempDir = System.getProperty("java.io.tmpdir");
  120 + }
  121 +
  122 + File dir = new File(tempDir);
  123 + if (!dir.exists()) {
  124 + boolean created = dir.mkdirs();
  125 + if (created) {
  126 + log.debug("创建临时目录: {}", tempDir);
  127 + } else {
  128 + log.warn("无法创建临时目录: {}", tempDir);
  129 + }
  130 + }
  131 + }
  132 +
  133 + /**
  134 + * MultipartFile 转 File
  135 + * @param multipartFile 上传文件
  136 + * @param tempDir 临时目录
  137 + * @return File 对象
  138 + */
  139 + public static File multipartFileToFile(MultipartFile multipartFile, String tempDir) throws IOException {
  140 + if (multipartFile == null || multipartFile.isEmpty()) {
  141 + return null;
  142 + }
  143 +
  144 + // 获取文件扩展名
  145 + String originalFilename = multipartFile.getOriginalFilename();
  146 + String extension = getFileExtension(originalFilename);
  147 +
  148 + // 生成唯一文件名
  149 + String uniqueFilename = UUID.randomUUID().toString() + extension;
  150 + String filePath = tempDir + File.separator + uniqueFilename;
  151 +
  152 + File file = new File(filePath);
  153 + multipartFile.transferTo(file);
  154 +
  155 + log.debug("创建临时文件: {}", filePath);
  156 + return file;
  157 + }
  158 +
  159 + /**
  160 + * 执行 OCR 识别
  161 + */
  162 + private static String performOcr(String imagePath) {
45 try { 163 try {
46 - log.info("OCR 程序开始执行...");  
47 - // 1. 初始化引擎(使用 v4 模型)  
48 - log.info("正在初始化 OCR 引擎 (PP-OCRv4)...");  
49 - InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);  
50 - // 2. 创建优化的参数配置 164 + // 获取引擎实例
  165 + InferenceEngine engine = getEngine();
  166 +
  167 + // 创建参数配置
51 ParamConfig config = createOptimizedParamConfig(); 168 ParamConfig config = createOptimizedParamConfig();
52 - // 4. 图像预处理(直接处理原图,不保存临时文件)  
53 - System.out.println("正在进行图像预处理...");  
54 - File file = multipartFileToFile(imageFile);  
55 - BufferedImage processedImage = preprocessImage(file);  
56 - // 5. 保存预处理后的图片到临时目录  
57 - if(!FileUtil.exist(sTmpPath)){  
58 - FileUtil.mkdir(sTmpPath);  
59 - }  
60 - processedImagePath = sTmpPath+"/processed_" + System.currentTimeMillis() + ".png";  
61 - ImageIO.write(processedImage, "png", new File(processedImagePath));  
62 - log.info("预处理图片已保存: " + processedImagePath);  
63 - // 6. 执行识别  
64 - log.info("开始识别图片..."); 169 +
  170 + // 执行识别
65 long startTime = System.currentTimeMillis(); 171 long startTime = System.currentTimeMillis();
66 - OcrResult ocrResult = engine.runOcr(processedImagePath, config); 172 + OcrResult ocrResult = engine.runOcr(imagePath, config);
67 long endTime = System.currentTimeMillis(); 173 long endTime = System.currentTimeMillis();
68 - // 7. 输出结果  
69 - String text = ocrResult.getStrRes().trim();  
70 - log.info("\n==================================");  
71 - log.info("识别结果:");  
72 - log.info(text);  
73 - log.info("==================================");  
74 - log.info("识别耗时: " + (endTime - startTime) + " ms");  
75 - // 8. 输出每个文本块  
76 -// if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) {  
77 -// System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):");  
78 -// List<TextBlock> textBlocks = ocrResult.getTextBlocks();  
79 -// for (int i = 0; i < textBlocks.size(); i++) {  
80 -// TextBlock block = textBlocks.get(i);  
81 -// System.out.printf(" 块%d: %s (置信度: %.2f)%n",  
82 -// i + 1,  
83 -// block.getText(),  
84 -// block.getBoxScore()  
85 -// );  
86 -// }  
87 -// }  
88 - return text; 174 +
  175 + log.info("OCR 识别耗时: {} ms", (endTime - startTime));
  176 +
  177 + // 输出文本块详情(DEBUG 级别)
  178 + if (log.isDebugEnabled() && ocrResult.getTextBlocks() != null) {
  179 + List<TextBlock> textBlocks = ocrResult.getTextBlocks();
  180 + log.debug("识别到 {} 个文本块", textBlocks.size());
  181 + for (int i = 0; i < textBlocks.size(); i++) {
  182 + TextBlock block = textBlocks.get(i);
  183 + log.debug(" 块{}: {} (置信度: {})",
  184 + i + 1, block.getText(), block.getBoxScore());
  185 + }
  186 + }
  187 +
  188 + return ocrResult.getStrRes().trim();
89 189
90 } catch (Exception e) { 190 } catch (Exception e) {
91 - System.err.println("OCR 识别失败: " + e.getMessage());  
92 - e.printStackTrace();  
93 - }finally {  
94 - // 9. 清理临时文件  
95 - FileUtil.del(processedImagePath); 191 + log.error("执行 OCR 识别失败: {}", e.getMessage(), e);
  192 + return StrUtil.EMPTY;
  193 + }
  194 + }
  195 +
  196 + /**
  197 + * 保存预处理后的图片
  198 + */
  199 + private static String saveProcessedImage(BufferedImage image, String tempDir) throws IOException {
  200 + if (image == null) {
  201 + return null;
96 } 202 }
97 - return StrUtil.EMPTY; 203 +
  204 + String filename = "processed_" + System.currentTimeMillis() + "_" + UUID.randomUUID().toString() + ".png";
  205 + String filePath = tempDir + File.separator + filename;
  206 +
  207 + File outputFile = new File(filePath);
  208 + ImageIO.write(image, "png", outputFile);
  209 +
  210 + log.debug("保存预处理图片: {}", filePath);
  211 + return filePath;
98 } 212 }
99 213
  214 + /**
  215 + * 清理临时文件
  216 + */
  217 + private static void cleanupTempFiles(File tempImageFile, String processedImagePath) {
  218 + // 清理原始临时文件
  219 + if (tempImageFile != null && tempImageFile.exists()) {
  220 + boolean deleted = tempImageFile.delete();
  221 + if (deleted) {
  222 + log.debug("删除临时文件: {}", tempImageFile.getPath());
  223 + } else {
  224 + log.warn("删除临时文件失败: {}", tempImageFile.getPath());
  225 + tempImageFile.deleteOnExit();
  226 + }
  227 + }
  228 +
  229 + // 清理预处理图片
  230 + if (StrUtil.isNotBlank(processedImagePath)) {
  231 + File processedFile = new File(processedImagePath);
  232 + if (processedFile.exists()) {
  233 + boolean deleted = processedFile.delete();
  234 + if (deleted) {
  235 + log.debug("删除预处理图片: {}", processedImagePath);
  236 + } else {
  237 + log.warn("删除预处理图片失败: {}", processedImagePath);
  238 + processedFile.deleteOnExit();
  239 + }
  240 + }
  241 + }
  242 + }
100 243
101 /** 244 /**
102 - * 优化的参数配置 245 + * 创建优化的参数配置
103 */ 246 */
104 private static ParamConfig createOptimizedParamConfig() { 247 private static ParamConfig createOptimizedParamConfig() {
105 ParamConfig config = new ParamConfig(); 248 ParamConfig config = new ParamConfig();
106 249
  250 + // 文本区域扩展
107 config.setPadding(50); 251 config.setPadding(50);
  252 +
  253 + // 最大边长限制(0 表示不限制)
108 config.setMaxSideLen(0); 254 config.setMaxSideLen(0);
  255 +
  256 + // 文本块置信度阈值
109 config.setBoxScoreThresh(0.4f); 257 config.setBoxScoreThresh(0.4f);
110 config.setBoxThresh(0.25f); 258 config.setBoxThresh(0.25f);
  259 +
  260 + // 文本区域扩展比例
111 config.setUnClipRatio(1.8f); 261 config.setUnClipRatio(1.8f);
  262 +
  263 + // 角度检测
112 config.setDoAngle(true); 264 config.setDoAngle(true);
113 config.setMostAngle(true); 265 config.setMostAngle(true);
114 266
  267 + log.debug("OCR 参数配置: padding={}, unClipRatio={}",
  268 + config.getPadding(), config.getUnClipRatio());
  269 +
115 return config; 270 return config;
116 } 271 }
117 272
118 /** 273 /**
119 - * 图像预处理 - 直接返回处理后的 BufferedImage 274 + * 图像预处理
120 */ 275 */
121 private static BufferedImage preprocessImage(File imageFile) throws IOException { 276 private static BufferedImage preprocessImage(File imageFile) throws IOException {
122 BufferedImage original = ImageIO.read(imageFile); 277 BufferedImage original = ImageIO.read(imageFile);
@@ -124,36 +279,21 @@ public class OcrUtil { @@ -124,36 +279,21 @@ public class OcrUtil {
124 throw new IOException("无法读取图片: " + imageFile.getPath()); 279 throw new IOException("无法读取图片: " + imageFile.getPath());
125 } 280 }
126 281
127 - System.out.println("原始尺寸: " + original.getWidth() + "x" + original.getHeight()); 282 + log.debug("原始图片尺寸: {}x{}", original.getWidth(), original.getHeight());
128 283
129 BufferedImage processed = original; 284 BufferedImage processed = original;
130 285
131 // 1. 如果图片太大,缩小尺寸 286 // 1. 如果图片太大,缩小尺寸
132 if (processed.getWidth() > 2000 || processed.getHeight() > 2000) { 287 if (processed.getWidth() > 2000 || processed.getHeight() > 2000) {
133 processed = resizeImage(processed, 1600, 1600); 288 processed = resizeImage(processed, 1600, 1600);
  289 + log.debug("缩小图片尺寸: {}x{}", processed.getWidth(), processed.getHeight());
134 } 290 }
135 291
136 // 2. 增强对比度 292 // 2. 增强对比度
137 processed = enhanceContrast(processed); 293 processed = enhanceContrast(processed);
138 294
139 - System.out.println("处理后尺寸: " + processed.getWidth() + "x" + processed.getHeight());  
140 -  
141 return processed; 295 return processed;
142 } 296 }
143 - /***  
144 - * @Author 钱豹  
145 - * @Date 11:01 2026/4/1  
146 - * @Param [multipartFile]  
147 - * @return java.io.File  
148 - * @Description 图片对象转换  
149 - **/  
150 - public static File multipartFileToFile(MultipartFile multipartFile) throws IOException {  
151 - // 创建临时文件  
152 - File file = File.createTempFile("temp", null);  
153 - // 将 MultipartFile 的内容传输到 File  
154 - multipartFile.transferTo(file);  
155 - return file;  
156 - }  
157 297
158 /** 298 /**
159 * 调整图片大小 299 * 调整图片大小
@@ -161,8 +301,12 @@ public class OcrUtil { @@ -161,8 +301,12 @@ public class OcrUtil {
161 private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) { 301 private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) {
162 int w = image.getWidth(); 302 int w = image.getWidth();
163 int h = image.getHeight(); 303 int h = image.getHeight();
  304 +
  305 + // 计算缩放比例
164 double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h); 306 double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h);
165 - if (ratio >= 1) return image; 307 + if (ratio >= 1.0) {
  308 + return image;
  309 + }
166 310
167 int newW = (int) (w * ratio); 311 int newW = (int) (w * ratio);
168 int newH = (int) (h * ratio); 312 int newH = (int) (h * ratio);
@@ -170,8 +314,10 @@ public class OcrUtil { @@ -170,8 +314,10 @@ public class OcrUtil {
170 BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB); 314 BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB);
171 Graphics2D g = resized.createGraphics(); 315 Graphics2D g = resized.createGraphics();
172 g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); 316 g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
  317 + g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
173 g.drawImage(image, 0, 0, newW, newH, null); 318 g.drawImage(image, 0, 0, newW, newH, null);
174 g.dispose(); 319 g.dispose();
  320 +
175 return resized; 321 return resized;
176 } 322 }
177 323
@@ -180,6 +326,7 @@ public class OcrUtil { @@ -180,6 +326,7 @@ public class OcrUtil {
180 */ 326 */
181 private static BufferedImage enhanceContrast(BufferedImage image) { 327 private static BufferedImage enhanceContrast(BufferedImage image) {
182 BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); 328 BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType());
  329 +
183 for (int y = 0; y < image.getHeight(); y++) { 330 for (int y = 0; y < image.getHeight(); y++) {
184 for (int x = 0; x < image.getWidth(); x++) { 331 for (int x = 0; x < image.getWidth(); x++) {
185 Color c = new Color(image.getRGB(x, y)); 332 Color c = new Color(image.getRGB(x, y));
@@ -189,21 +336,32 @@ public class OcrUtil { @@ -189,21 +336,32 @@ public class OcrUtil {
189 result.setRGB(x, y, new Color(r, g, b).getRGB()); 336 result.setRGB(x, y, new Color(r, g, b).getRGB());
190 } 337 }
191 } 338 }
  339 +
192 return result; 340 return result;
193 } 341 }
194 342
195 - public static void main(String[] args) {  
196 - try {  
197 - System.out.println("OCR 程序开始执行...");  
198 -  
199 - // 1. 初始化引擎(使用 v4 模型)  
200 - System.out.println("正在初始化 OCR 引擎 (PP-OCRv4)...");  
201 - InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); 343 + /**
  344 + * 获取文件扩展名
  345 + */
  346 + private static String getFileExtension(String filename) {
  347 + if (StrUtil.isBlank(filename)) {
  348 + return ".jpg";
  349 + }
  350 + int lastDotIndex = filename.lastIndexOf(".");
  351 + if (lastDotIndex == -1) {
  352 + return ".jpg";
  353 + }
  354 + return filename.substring(lastDotIndex);
  355 + }
202 356
203 - // 2. 创建优化的参数配置  
204 - ParamConfig config = createOptimizedParamConfig(); 357 + /**
  358 + * 测试方法
  359 + */
  360 + public static void main(String[] args) {
  361 + String tempDir = "D:/temp/ocrJava";
205 362
206 - // 3. 图片路径 363 + // 测试识别
  364 + try {
207 String imagePath = "E:/aa/b.jpg"; 365 String imagePath = "E:/aa/b.jpg";
208 File imageFile = new File(imagePath); 366 File imageFile = new File(imagePath);
209 if (!imageFile.exists()) { 367 if (!imageFile.exists()) {
@@ -211,48 +369,17 @@ public class OcrUtil { @@ -211,48 +369,17 @@ public class OcrUtil {
211 return; 369 return;
212 } 370 }
213 371
214 - // 4. 图像预处理(直接处理原图,不保存临时文件)  
215 - System.out.println("正在进行图像预处理..."); 372 + // 手动测试(实际使用中应该通过 MultipartFile)
216 BufferedImage processedImage = preprocessImage(imageFile); 373 BufferedImage processedImage = preprocessImage(imageFile);
  374 + String processedPath = saveProcessedImage(processedImage, tempDir);
  375 + String result = performOcr(processedPath);
217 376
218 - // 5. 保存预处理后的图片到临时目录  
219 - String processedImagePath = "D:/temp/ocrJava/processed_" + System.currentTimeMillis() + ".png";  
220 - ImageIO.write(processedImage, "png", new File(processedImagePath));  
221 - System.out.println("预处理图片已保存: " + processedImagePath);  
222 -  
223 - // 6. 执行识别  
224 - System.out.println("开始识别图片...");  
225 - long startTime = System.currentTimeMillis();  
226 - OcrResult ocrResult = engine.runOcr(processedImagePath, config);  
227 - long endTime = System.currentTimeMillis();  
228 -  
229 - // 7. 输出结果  
230 - String text = ocrResult.getStrRes().trim();  
231 - System.out.println("\n==================================");  
232 - System.out.println("识别结果:");  
233 - System.out.println(text);  
234 - System.out.println("==================================");  
235 - System.out.println("识别耗时: " + (endTime - startTime) + " ms");  
236 -  
237 - // 8. 输出每个文本块  
238 - if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) {  
239 - System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):");  
240 - List<TextBlock> textBlocks = ocrResult.getTextBlocks();  
241 - for (int i = 0; i < textBlocks.size(); i++) {  
242 - TextBlock block = textBlocks.get(i);  
243 - System.out.printf(" 块%d: %s (置信度: %.2f)%n",  
244 - i + 1,  
245 - block.getText(),  
246 - block.getBoxScore()  
247 - );  
248 - }  
249 - } 377 + System.out.println("识别结果: " + result);
250 378
251 - // 9. 清理临时文件  
252 - new File(processedImagePath).delete(); 379 + // 清理
  380 + new File(processedPath).delete();
253 381
254 } catch (Exception e) { 382 } catch (Exception e) {
255 - System.err.println("OCR 识别失败: " + e.getMessage());  
256 e.printStackTrace(); 383 e.printStackTrace();
257 } 384 }
258 } 385 }
src/main/java/com/xly/ocr/web/OcrController.java
@@ -28,10 +28,10 @@ public class OcrController { @@ -28,10 +28,10 @@ public class OcrController {
28 return ResponseEntity.ok(dto); 28 return ResponseEntity.ok(dto);
29 } 29 }
30 30
31 - @PostMapping("/batch")  
32 - public ResponseEntity<List<String>> batchExtract(  
33 - @RequestParam("files") List<MultipartFile> files) {  
34 - List<String> results = ocrService.batchExtractText(files);  
35 - return ResponseEntity.ok(results);  
36 - } 31 +// @PostMapping("/batch")
  32 +// public ResponseEntity<List<String>> batchExtract(
  33 +// @RequestParam("files") List<MultipartFile> files) {
  34 +// List<String> results = ocrService.batchExtractText(files);
  35 +// return ResponseEntity.ok(results);
  36 +// }
37 } 37 }
38 \ No newline at end of file 38 \ No newline at end of file