Commit b436869234a3250081525b7be1f363b0adda3be4

Authored by qianbao
1 parent f2e11f8f

添加向量库

src/main/java/com/xly/ocr/service/OcrService.java
... ... @@ -2,21 +2,10 @@ package com.xly.ocr.service;
2 2  
3 3 import com.xly.ocr.util.OcrUtil;
4 4 import lombok.extern.slf4j.Slf4j;
5   -import net.sourceforge.tess4j.Tesseract;
6   -import net.sourceforge.tess4j.TesseractException;
7   -import org.slf4j.Logger;
8   -import org.slf4j.LoggerFactory;
9 5 import org.springframework.beans.factory.annotation.Value;
10 6 import org.springframework.stereotype.Service;
11 7 import org.springframework.web.multipart.MultipartFile;
12 8  
13   -import javax.imageio.ImageIO;
14   -import java.awt.*;
15   -import java.awt.image.BufferedImage;
16   -import java.io.File;
17   -import java.io.IOException;
18   -import java.nio.file.Files;
19   -import java.nio.file.Path;
20 9 import java.util.Arrays;
21 10 import java.util.List;
22 11  
... ... @@ -24,473 +13,64 @@ import java.util.List;
24 13 @Service("ocrService")
25 14 public class OcrService {
26 15  
27   - private static final Logger logger = LoggerFactory.getLogger(OcrService.class);
28   -
29 16 @Value("${ocr.tmpPath}")
30 17 private String tmpPath;
31 18  
32   -
33   - private final Tesseract tesseract;
34   -
35   - // 配置参数
36   - private static final List<String> ALLOWED_EXTENSIONS = Arrays.asList(".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif");
  19 + private static final List<String> ALLOWED_EXTENSIONS = Arrays.asList(
  20 + ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"
  21 + );
37 22 private static final long MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
38   - private static final int BINARIZE_THRESHOLD = 127;
39   - private static final int MIN_WIDTH = 800;
40   - private static final int MIN_HEIGHT = 200;
41   -
42   - // 性能统计
43   - private static class OcrStats {
44   - long preprocessTime = 0;
45   - long ocrTime = 0;
46   - String imageSize = "";
47   -
48   - @Override
49   - public String toString() {
50   - return String.format("预处理耗时: %dms, OCR耗时: %dms, 图片尺寸: %s",
51   - preprocessTime, ocrTime, imageSize);
52   - }
53   - }
54   -
55   - public OcrService(@Value("${tesseract.datapath}") String dataPath) {
56   - this.tesseract = new Tesseract();
57   -
58   - // 基础配置
59   - this.tesseract.setDatapath(dataPath);
60   - this.tesseract.setLanguage("chi_sim+eng");
61   -
62   - // 优化识别参数
63   - configureTesseract();
64   -
65   - logger.info("Tesseract 初始化完成,语言包路径: {}, 语言: chi_sim+eng", dataPath);
66   - }
67   -
68   - /**
69   - * 配置 Tesseract 参数
70   - */
71   - private void configureTesseract() {
72   - // 页面分割模式:3 = 自动页面分割,但没有方向检测
73   - this.tesseract.setPageSegMode(3);
74   -
75   - // OCR 引擎模式:3 = 默认,基于 LSTM 和传统引擎
76   - this.tesseract.setOcrEngineMode(3);
77   -
78   - // 提高中文识别率
79   - this.tesseract.setVariable("preserve_interword_spaces", "1");
80   - this.tesseract.setVariable("textord_force_make_prop_words", "true");
81   -
82   - // 可选:设置字符白名单(根据需要启用)
83   - // this.tesseract.setVariable("tessedit_char_whitelist",
84   - // "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,。!?;:\"‘’“”【】()《》");
85   -
86   - // 可选:设置黑名单(排除干扰字符)
87   - // this.tesseract.setVariable("tessedit_char_blacklist", "|\\/`~@#$%^&*()_+={}[]");
88   - }
89   -
90   - /**
91   - * 图片预处理 - 优化的处理流程
92   - */
93   - private BufferedImage preprocessImage(BufferedImage originalImage) {
94   - if (originalImage == null) {
95   - return null;
96   - }
97   -
98   - try {
99   - long startTime = System.currentTimeMillis();
100   -
101   - // 1. 自动调整亮度和对比度
102   - BufferedImage adjusted = autoAdjustBrightnessContrast(originalImage);
103   -
104   - // 2. 灰度化
105   - BufferedImage grayImage = toGray(adjusted);
106   -
107   - // 3. 自适应二值化(比固定阈值更好)
108   - BufferedImage binaryImage = adaptiveBinarize(grayImage);
109   -
110   - // 4. 降噪处理
111   - BufferedImage denoisedImage = denoise(binaryImage);
112   -
113   - // 5. 放大图片(如果太小)
114   - BufferedImage scaledImage = scaleImageIfNeeded(denoisedImage);
115   -
116   - // 6. 可选:边缘增强(提高清晰度)
117   - BufferedImage enhancedImage = sharpen(scaledImage);
118   -
119   - long endTime = System.currentTimeMillis();
120   - logger.debug("图片预处理耗时: {}ms", endTime - startTime);
121   -
122   - return enhancedImage;
123   -
124   - } catch (Exception e) {
125   - logger.error("图片预处理失败: {}", e.getMessage(), e);
126   - return originalImage;
127   - }
128   - }
129   -
130   - /**
131   - * 自动调整亮度和对比度
132   - */
133   - private BufferedImage autoAdjustBrightnessContrast(BufferedImage image) {
134   - BufferedImage result = new BufferedImage(
135   - image.getWidth(), image.getHeight(), image.getType());
136   -
137   - // 计算亮度直方图
138   - int[] histogram = new int[256];
139   - for (int y = 0; y < image.getHeight(); y++) {
140   - for (int x = 0; x < image.getWidth(); x++) {
141   - int rgb = image.getRGB(x, y);
142   - int gray = (int)((rgb >> 16 & 0xFF) * 0.299 +
143   - (rgb >> 8 & 0xFF) * 0.587 +
144   - (rgb & 0xFF) * 0.114);
145   - histogram[gray]++;
146   - }
147   - }
148   -
149   - // 找到黑色和白色的阈值
150   - int total = image.getWidth() * image.getHeight();
151   - int blackThreshold = 0;
152   - int whiteThreshold = 255;
153   -
154   - int sum = 0;
155   - for (int i = 0; i < 256; i++) {
156   - sum += histogram[i];
157   - if (sum > total * 0.05) {
158   - blackThreshold = i;
159   - break;
160   - }
161   - }
162   -
163   - sum = 0;
164   - for (int i = 255; i >= 0; i--) {
165   - sum += histogram[i];
166   - if (sum > total * 0.05) {
167   - whiteThreshold = i;
168   - break;
169   - }
170   - }
171   -
172   - // 应用对比度拉伸
173   - for (int y = 0; y < image.getHeight(); y++) {
174   - for (int x = 0; x < image.getWidth(); x++) {
175   - int rgb = image.getRGB(x, y);
176   - int r = (rgb >> 16) & 0xFF;
177   - int g = (rgb >> 8) & 0xFF;
178   - int b = rgb & 0xFF;
179   -
180   - // 拉伸到 0-255 范围
181   - r = stretchValue(r, blackThreshold, whiteThreshold);
182   - g = stretchValue(g, blackThreshold, whiteThreshold);
183   - b = stretchValue(b, blackThreshold, whiteThreshold);
184   -
185   - result.setRGB(x, y, (r << 16) | (g << 8) | b);
186   - }
187   - }
188   -
189   - return result;
190   - }
191   -
192   - private int stretchValue(int value, int black, int white) {
193   - if (value <= black) return 0;
194   - if (value >= white) return 255;
195   - return (value - black) * 255 / (white - black);
196   - }
197   -
198   - /**
199   - * 灰度化
200   - */
201   - private BufferedImage toGray(BufferedImage image) {
202   - BufferedImage result = new BufferedImage(
203   - image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
204   - Graphics g = result.getGraphics();
205   - g.drawImage(image, 0, 0, null);
206   - g.dispose();
207   - return result;
208   - }
209   -
210   - /**
211   - * 自适应二值化 - 根据局部区域动态调整阈值
212   - */
213   - private BufferedImage adaptiveBinarize(BufferedImage image) {
214   - BufferedImage result = new BufferedImage(
215   - image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);
216   -
217   - int blockSize = 15;
218   - int constant = 5;
219   -
220   - for (int y = 0; y < image.getHeight(); y++) {
221   - for (int x = 0; x < image.getWidth(); x++) {
222   - // 计算局部区域的平均值
223   - int sum = 0;
224   - int count = 0;
225   - for (int ky = -blockSize/2; ky <= blockSize/2; ky++) {
226   - for (int kx = -blockSize/2; kx <= blockSize/2; kx++) {
227   - int px = Math.min(Math.max(x + kx, 0), image.getWidth() - 1);
228   - int py = Math.min(Math.max(y + ky, 0), image.getHeight() - 1);
229   - sum += new Color(image.getRGB(px, py)).getRed();
230   - count++;
231   - }
232   - }
233   - int threshold = sum / count - constant;
234   -
235   - // 应用阈值
236   - int gray = new Color(image.getRGB(x, y)).getRed();
237   - int binary = gray > threshold ? 255 : 0;
238   - result.setRGB(x, y, new Color(binary, binary, binary).getRGB());
239   - }
240   - }
241   - return result;
242   - }
243 23  
244 24 /**
245   - * 降噪 - 优化的中值滤波
  25 + * 从 MultipartFile 中提取文字
246 26 */
247   - private BufferedImage denoise(BufferedImage image) {
248   - BufferedImage result = new BufferedImage(
249   - image.getWidth(), image.getHeight(), image.getType());
250   -
251   - for (int y = 1; y < image.getHeight() - 1; y++) {
252   - for (int x = 1; x < image.getWidth() - 1; x++) {
253   - int[] neighbors = new int[9];
254   - int index = 0;
255   - for (int ky = -1; ky <= 1; ky++) {
256   - for (int kx = -1; kx <= 1; kx++) {
257   - neighbors[index++] = new Color(image.getRGB(x + kx, y + ky)).getRed();
258   - }
259   - }
260   - Arrays.sort(neighbors);
261   - int median = neighbors[4];
262   - result.setRGB(x, y, new Color(median, median, median).getRGB());
263   - }
264   - }
265   -
266   - // 处理边缘
267   - for (int x = 0; x < image.getWidth(); x++) {
268   - result.setRGB(x, 0, image.getRGB(x, 0));
269   - result.setRGB(x, image.getHeight() - 1, image.getRGB(x, image.getHeight() - 1));
270   - }
271   - for (int y = 0; y < image.getHeight(); y++) {
272   - result.setRGB(0, y, image.getRGB(0, y));
273   - result.setRGB(image.getWidth() - 1, y, image.getRGB(image.getWidth() - 1, y));
274   - }
275   -
276   - return result;
277   - }
278   -
279   - /**
280   - * 锐化处理 - 增强文字边缘
281   - */
282   - private BufferedImage sharpen(BufferedImage image) {
283   - BufferedImage result = new BufferedImage(
284   - image.getWidth(), image.getHeight(), image.getType());
285   -
286   - // 拉普拉斯锐化核
287   - float[] sharpenKernel = {
288   - 0, -1, 0,
289   - -1, 5, -1,
290   - 0, -1, 0
291   - };
292   -
293   - for (int y = 1; y < image.getHeight() - 1; y++) {
294   - for (int x = 1; x < image.getWidth() - 1; x++) {
295   - int sum = 0;
296   - int index = 0;
297   - for (int ky = -1; ky <= 1; ky++) {
298   - for (int kx = -1; kx <= 1; kx++) {
299   - int gray = new Color(image.getRGB(x + kx, y + ky)).getRed();
300   - sum += gray * sharpenKernel[index++];
301   - }
302   - }
303   - sum = Math.min(255, Math.max(0, sum));
304   - result.setRGB(x, y, new Color(sum, sum, sum).getRGB());
305   - }
306   - }
307   -
308   - return result;
309   - }
310   -
311   - /**
312   - * 放大图片(如果图片太小)
313   - */
314   - private BufferedImage scaleImageIfNeeded(BufferedImage image) {
315   - int width = image.getWidth();
316   - int height = image.getHeight();
317   -
318   - if (width >= MIN_WIDTH && height >= MIN_HEIGHT) {
319   - return image;
320   - }
321   -
322   - double scaleX = (double) MIN_WIDTH / width;
323   - double scaleY = (double) MIN_HEIGHT / height;
324   - double scale = Math.max(scaleX, scaleY);
325   -
326   - int newWidth = (int) (width * scale);
327   - int newHeight = (int) (height * scale);
328   -
329   - // 使用更好的插值算法
330   - BufferedImage result = new BufferedImage(newWidth, newHeight, image.getType());
331   - Graphics2D g2d = result.createGraphics();
332   - g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
333   - RenderingHints.VALUE_INTERPOLATION_BICUBIC);
334   - g2d.setRenderingHint(RenderingHints.KEY_RENDERING,
335   - RenderingHints.VALUE_RENDER_QUALITY);
336   - g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING,
337   - RenderingHints.VALUE_ANTIALIAS_ON);
338   - g2d.drawImage(image, 0, 0, newWidth, newHeight, null);
339   - g2d.dispose();
340   -
341   - logger.debug("图片已放大: {}x{} -> {}x{}", width, height, newWidth, newHeight);
342   - return result;
343   - }
344   -
345   - /**
346   - * 识别图片中的文字(增强版)
347   - */
348   - public String extractText(File imageFile) {
349   - if (imageFile == null || !imageFile.exists()) {
350   - logger.error("图片文件不存在或为空");
351   - return "图片文件不存在";
  27 + public String extractTextFromMultipartFile(MultipartFile file) {
  28 + // 1. 验证文件
  29 + String validationError = validateFile(file);
  30 + if (validationError != null) {
  31 + return validationError;
352 32 }
353 33  
354   - OcrStats stats = new OcrStats();
355   -
  34 + // 2. 调用 OCR 工具类识别
356 35 try {
357   - logger.info("开始识别图片: {}, 大小: {} bytes",
358   - imageFile.getAbsolutePath(), imageFile.length());
  36 + String result = OcrUtil.ocrFile(file, tmpPath);
359 37  
360   - // 读取原始图片
361   - long readStart = System.currentTimeMillis();
362   - BufferedImage originalImage = ImageIO.read(imageFile);
363   - if (originalImage == null) {
364   - return "无法读取图片文件,请确保图片格式正确";
365   - }
366   - stats.imageSize = originalImage.getWidth() + "x" + originalImage.getHeight();
367   -
368   - // 图片预处理
369   - long preprocessStart = System.currentTimeMillis();
370   - BufferedImage processedImage = preprocessImage(originalImage);
371   - stats.preprocessTime = System.currentTimeMillis() - preprocessStart;
372   -
373   - // 可选:保存预处理图片用于调试(生产环境可注释)
374   - if (logger.isDebugEnabled()) {
375   - saveDebugImage(processedImage, imageFile);
376   - }
377   -
378   - // 执行 OCR
379   - long ocrStart = System.currentTimeMillis();
380   - String result = tesseract.doOCR(processedImage);
381   - stats.ocrTime = System.currentTimeMillis() - ocrStart;
382   -
383   - logger.info("识别完成 - {}", stats);
384   -
385   - // 清理识别结果
386   - result = cleanResult(result);
387   -
388   - if (result.isEmpty()) {
389   - logger.warn("识别结果为空,可能需要调整预处理参数");
  38 + if (result == null || result.trim().isEmpty()) {
  39 + log.warn("未识别到文字内容,文件: {}", file.getOriginalFilename());
  40 + return "未识别到文字内容";
390 41 }
391 42  
  43 + log.info("识别成功,文件: {}, 文字长度: {}",
  44 + file.getOriginalFilename(), result.length());
392 45 return result;
393 46  
394   - } catch (TesseractException e) {
395   - logger.error("OCR识别失败: {}", e.getMessage(), e);
  47 + } catch (Exception e) {
  48 + log.error("OCR识别异常: {}", e.getMessage(), e);
396 49 return "OCR识别失败: " + e.getMessage();
397   - } catch (IOException e) {
398   - logger.error("读取图片失败: {}", e.getMessage(), e);
399   - return "读取图片失败: " + e.getMessage();
400   - }
401   - }
402   -
403   - /**
404   - * 保存调试图片(仅用于调试)
405   - */
406   - private void saveDebugImage(BufferedImage image, File originalFile) {
407   - try {
408   - String debugPath = originalFile.getParent() + "/debug_" + originalFile.getName();
409   - File debugFile = new File(debugPath);
410   - ImageIO.write(image, "png", debugFile);
411   - logger.debug("预处理图片已保存: {}", debugPath);
412   - } catch (IOException e) {
413   - logger.debug("保存调试图片失败: {}", e.getMessage());
414   - }
415   - }
416   -
417   - /**
418   - * 清理识别结果
419   - */
420   - private String cleanResult(String result) {
421   - if (result == null || result.isEmpty()) {
422   - return "";
423   - }
424   -
425   - // 去除首尾空白
426   - result = result.trim();
427   -
428   - // 规范化换行符
429   - result = result.replaceAll("\\r\\n", "\n")
430   - .replaceAll("\\r", "\n");
431   -
432   - // 合并多个空行
433   - result = result.replaceAll("\n{3,}", "\n\n");
434   -
435   - // 去除行首行尾空格
436   - String[] lines = result.split("\n");
437   - StringBuilder cleaned = new StringBuilder();
438   - for (String line : lines) {
439   - cleaned.append(line.trim()).append("\n");
440 50 }
441   -
442   - return cleaned.toString().trim();
443 51 }
444 52  
445 53 /**
446   - * 封装方法,接收上传的 MultipartFile
  54 + * 验证文件
447 55 */
448   - public String extractTextFromMultipartFile(MultipartFile file) {
  56 + private String validateFile(MultipartFile file) {
449 57 if (file == null || file.isEmpty()) {
450   - logger.warn("上传的文件为空");
  58 + log.warn("上传的文件为空");
451 59 return "上传的文件为空";
452 60 }
453 61  
454   - // 验证文件大小
455 62 if (file.getSize() > MAX_FILE_SIZE) {
456   - logger.warn("文件过大: {} bytes, 超过限制: {} bytes",
457   - file.getSize(), MAX_FILE_SIZE);
  63 + log.warn("文件过大: {} bytes", file.getSize());
458 64 return String.format("文件过大,最大支持 %dMB", MAX_FILE_SIZE / 1024 / 1024);
459 65 }
460 66  
461   - // 验证文件格式
462 67 String originalFilename = file.getOriginalFilename();
463 68 if (originalFilename != null && !isAllowedImage(originalFilename)) {
464   - logger.warn("不支持的文件格式: {}", originalFilename);
  69 + log.warn("不支持的文件格式: {}", originalFilename);
465 70 return "不支持的文件格式,仅支持: " + String.join(", ", ALLOWED_EXTENSIONS);
466 71 }
467   - String sText = OcrUtil.ocrFile(file,tmpPath);
468   - return sText;
469   - }
470   -
471   - /**
472   - * 清理临时文件
473   - */
474   - private void cleanupTempFile(Path tempFile) {
475   - if (tempFile != null) {
476   - try {
477   - Files.deleteIfExists(tempFile);
478   - logger.debug("临时文件已删除: {}", tempFile);
479   - } catch (IOException e) {
480   - logger.warn("删除临时文件失败: {}", tempFile, e);
481   - // 注册JVM退出时删除
482   - tempFile.toFile().deleteOnExit();
483   - }
484   - }
485   - }
486 72  
487   - /**
488   - * 批量识别(用于多张图片)
489   - */
490   - public List<String> batchExtractText(List<MultipartFile> files) {
491   - return files.stream()
492   - .map(this::extractTextFromMultipartFile)
493   - .collect(java.util.stream.Collectors.toList());
  73 + return null;
494 74 }
495 75  
496 76 /**
... ... @@ -504,14 +84,4 @@ public class OcrService {
504 84 return ALLOWED_EXTENSIONS.stream()
505 85 .anyMatch(lowerFilename::endsWith);
506 86 }
507   -
508   - /**
509   - * 获取文件扩展名
510   - */
511   - private String getFileExtension(String filename) {
512   - if (filename == null || !filename.contains(".")) {
513   - return ".jpg";
514   - }
515   - return filename.substring(filename.lastIndexOf("."));
516   - }
517 87 }
518 88 \ No newline at end of file
... ...
src/main/java/com/xly/ocr/util/OcrUtil.java
... ... @@ -16,107 +16,262 @@ import java.awt.image.BufferedImage;
16 16 import java.io.File;
17 17 import java.io.IOException;
18 18 import java.util.List;
  19 +import java.util.UUID;
19 20  
20 21 @Slf4j
21 22 public class OcrUtil {
22 23  
23   - static {
  24 + // 引擎实例(单例,避免重复初始化)
  25 + private static volatile InferenceEngine engine;
  26 + private static final Object LOCK = new Object();
  27 +
  28 + /**
  29 + * 获取 OCR 引擎实例(懒加载单例)
  30 + */
  31 + private static InferenceEngine getEngine() {
  32 + if (engine == null) {
  33 + synchronized (LOCK) {
  34 + if (engine == null) {
  35 + try {
  36 + log.info("初始化 OCR 引擎 (PP-OCRv4)...");
  37 + engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);
  38 + log.info("OCR 引擎初始化成功");
  39 + } catch (Exception e) {
  40 + log.error("OCR 引擎初始化失败: {}", e.getMessage(), e);
  41 + throw new RuntimeException("OCR 引擎初始化失败", e);
  42 + }
  43 + }
  44 + }
  45 + }
  46 + return engine;
  47 + }
  48 +
  49 + /**
  50 + * 识别图片中的文字
  51 + * @param imageFile 上传的图片文件
  52 + * @param tempDir 临时目录路径
  53 + * @return 识别出的文字
  54 + */
  55 + public static String ocrFile(MultipartFile imageFile, String tempDir) {
  56 + File tempImageFile = null;
  57 + String processedImagePath = null;
  58 +
24 59 try {
25   - String customTempDir = "D:/temp/ocrJava";
26   - File tempDir = new File(customTempDir);
27   - if (!tempDir.exists()) {
28   - tempDir.mkdirs();
  60 + log.info("开始 OCR 识别,文件: {}", imageFile.getOriginalFilename());
  61 +
  62 + // 1. 验证输入
  63 + if (imageFile == null || imageFile.isEmpty()) {
  64 + log.warn("图片文件为空");
  65 + return StrUtil.EMPTY;
  66 + }
  67 +
  68 + // 2. 创建临时目录
  69 + ensureTempDirExists(tempDir);
  70 +
  71 + // 3. MultipartFile 转 File
  72 + tempImageFile = multipartFileToFile(imageFile, tempDir);
  73 + if (tempImageFile == null || !tempImageFile.exists()) {
  74 + log.error("转换临时文件失败");
  75 + return StrUtil.EMPTY;
  76 + }
  77 +
  78 + // 4. 图像预处理
  79 + BufferedImage processedImage = preprocessImage(tempImageFile);
  80 + if (processedImage == null) {
  81 + log.error("图像预处理失败");
  82 + return StrUtil.EMPTY;
  83 + }
  84 +
  85 + // 5. 保存预处理图片
  86 + processedImagePath = saveProcessedImage(processedImage, tempDir);
  87 + if (processedImagePath == null) {
  88 + log.error("保存预处理图片失败");
  89 + return StrUtil.EMPTY;
  90 + }
  91 +
  92 + // 6. 执行 OCR 识别
  93 + String text = performOcr(processedImagePath);
  94 +
  95 + // 7. 记录识别结果
  96 + if (StrUtil.isNotBlank(text)) {
  97 + log.info("OCR 识别成功,文字长度: {} 字符", text.length());
  98 + log.debug("识别结果: {}", text);
  99 + } else {
  100 + log.warn("OCR 识别结果为空");
29 101 }
30   - System.setProperty("java.io.tmpdir", customTempDir);
31   - System.setProperty("TMP", customTempDir);
32   - System.setProperty("TEMP", customTempDir);
33 102  
34   - System.out.println("==================================");
35   - System.out.println("临时目录: " + System.getProperty("java.io.tmpdir"));
36   - System.out.println("==================================");
  103 + return text;
37 104  
38 105 } catch (Exception e) {
39   - System.err.println("设置临时目录失败: " + e.getMessage());
  106 + log.error("OCR 识别失败: {}", e.getMessage(), e);
  107 + return StrUtil.EMPTY;
  108 + } finally {
  109 + // 清理临时文件
  110 + cleanupTempFiles(tempImageFile, processedImagePath);
40 111 }
41 112 }
42 113  
43   - public static String ocrFile(MultipartFile imageFile, String sTmpPath){
44   - String processedImagePath = StrUtil.EMPTY;
  114 + /**
  115 + * 确保临时目录存在
  116 + */
  117 + private static void ensureTempDirExists(String tempDir) {
  118 + if (StrUtil.isBlank(tempDir)) {
  119 + tempDir = System.getProperty("java.io.tmpdir");
  120 + }
  121 +
  122 + File dir = new File(tempDir);
  123 + if (!dir.exists()) {
  124 + boolean created = dir.mkdirs();
  125 + if (created) {
  126 + log.debug("创建临时目录: {}", tempDir);
  127 + } else {
  128 + log.warn("无法创建临时目录: {}", tempDir);
  129 + }
  130 + }
  131 + }
  132 +
  133 + /**
  134 + * MultipartFile 转 File
  135 + * @param multipartFile 上传文件
  136 + * @param tempDir 临时目录
  137 + * @return File 对象
  138 + */
  139 + public static File multipartFileToFile(MultipartFile multipartFile, String tempDir) throws IOException {
  140 + if (multipartFile == null || multipartFile.isEmpty()) {
  141 + return null;
  142 + }
  143 +
  144 + // 获取文件扩展名
  145 + String originalFilename = multipartFile.getOriginalFilename();
  146 + String extension = getFileExtension(originalFilename);
  147 +
  148 + // 生成唯一文件名
  149 + String uniqueFilename = UUID.randomUUID().toString() + extension;
  150 + String filePath = tempDir + File.separator + uniqueFilename;
  151 +
  152 + File file = new File(filePath);
  153 + multipartFile.transferTo(file);
  154 +
  155 + log.debug("创建临时文件: {}", filePath);
  156 + return file;
  157 + }
  158 +
  159 + /**
  160 + * 执行 OCR 识别
  161 + */
  162 + private static String performOcr(String imagePath) {
45 163 try {
46   - log.info("OCR 程序开始执行...");
47   - // 1. 初始化引擎(使用 v4 模型)
48   - log.info("正在初始化 OCR 引擎 (PP-OCRv4)...");
49   - InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);
50   - // 2. 创建优化的参数配置
  164 + // 获取引擎实例
  165 + InferenceEngine engine = getEngine();
  166 +
  167 + // 创建参数配置
51 168 ParamConfig config = createOptimizedParamConfig();
52   - // 4. 图像预处理(直接处理原图,不保存临时文件)
53   - System.out.println("正在进行图像预处理...");
54   - File file = multipartFileToFile(imageFile);
55   - BufferedImage processedImage = preprocessImage(file);
56   - // 5. 保存预处理后的图片到临时目录
57   - if(!FileUtil.exist(sTmpPath)){
58   - FileUtil.mkdir(sTmpPath);
59   - }
60   - processedImagePath = sTmpPath+"/processed_" + System.currentTimeMillis() + ".png";
61   - ImageIO.write(processedImage, "png", new File(processedImagePath));
62   - log.info("预处理图片已保存: " + processedImagePath);
63   - // 6. 执行识别
64   - log.info("开始识别图片...");
  169 +
  170 + // 执行识别
65 171 long startTime = System.currentTimeMillis();
66   - OcrResult ocrResult = engine.runOcr(processedImagePath, config);
  172 + OcrResult ocrResult = engine.runOcr(imagePath, config);
67 173 long endTime = System.currentTimeMillis();
68   - // 7. 输出结果
69   - String text = ocrResult.getStrRes().trim();
70   - log.info("\n==================================");
71   - log.info("识别结果:");
72   - log.info(text);
73   - log.info("==================================");
74   - log.info("识别耗时: " + (endTime - startTime) + " ms");
75   - // 8. 输出每个文本块
76   -// if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) {
77   -// System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):");
78   -// List<TextBlock> textBlocks = ocrResult.getTextBlocks();
79   -// for (int i = 0; i < textBlocks.size(); i++) {
80   -// TextBlock block = textBlocks.get(i);
81   -// System.out.printf(" 块%d: %s (置信度: %.2f)%n",
82   -// i + 1,
83   -// block.getText(),
84   -// block.getBoxScore()
85   -// );
86   -// }
87   -// }
88   - return text;
  174 +
  175 + log.info("OCR 识别耗时: {} ms", (endTime - startTime));
  176 +
  177 + // 输出文本块详情(DEBUG 级别)
  178 + if (log.isDebugEnabled() && ocrResult.getTextBlocks() != null) {
  179 + List<TextBlock> textBlocks = ocrResult.getTextBlocks();
  180 + log.debug("识别到 {} 个文本块", textBlocks.size());
  181 + for (int i = 0; i < textBlocks.size(); i++) {
  182 + TextBlock block = textBlocks.get(i);
  183 + log.debug(" 块{}: {} (置信度: {})",
  184 + i + 1, block.getText(), block.getBoxScore());
  185 + }
  186 + }
  187 +
  188 + return ocrResult.getStrRes().trim();
89 189  
90 190 } catch (Exception e) {
91   - System.err.println("OCR 识别失败: " + e.getMessage());
92   - e.printStackTrace();
93   - }finally {
94   - // 9. 清理临时文件
95   - FileUtil.del(processedImagePath);
  191 + log.error("执行 OCR 识别失败: {}", e.getMessage(), e);
  192 + return StrUtil.EMPTY;
  193 + }
  194 + }
  195 +
  196 + /**
  197 + * 保存预处理后的图片
  198 + */
  199 + private static String saveProcessedImage(BufferedImage image, String tempDir) throws IOException {
  200 + if (image == null) {
  201 + return null;
96 202 }
97   - return StrUtil.EMPTY;
  203 +
  204 + String filename = "processed_" + System.currentTimeMillis() + "_" + UUID.randomUUID().toString() + ".png";
  205 + String filePath = tempDir + File.separator + filename;
  206 +
  207 + File outputFile = new File(filePath);
  208 + ImageIO.write(image, "png", outputFile);
  209 +
  210 + log.debug("保存预处理图片: {}", filePath);
  211 + return filePath;
98 212 }
99 213  
  214 + /**
  215 + * 清理临时文件
  216 + */
  217 + private static void cleanupTempFiles(File tempImageFile, String processedImagePath) {
  218 + // 清理原始临时文件
  219 + if (tempImageFile != null && tempImageFile.exists()) {
  220 + boolean deleted = tempImageFile.delete();
  221 + if (deleted) {
  222 + log.debug("删除临时文件: {}", tempImageFile.getPath());
  223 + } else {
  224 + log.warn("删除临时文件失败: {}", tempImageFile.getPath());
  225 + tempImageFile.deleteOnExit();
  226 + }
  227 + }
  228 +
  229 + // 清理预处理图片
  230 + if (StrUtil.isNotBlank(processedImagePath)) {
  231 + File processedFile = new File(processedImagePath);
  232 + if (processedFile.exists()) {
  233 + boolean deleted = processedFile.delete();
  234 + if (deleted) {
  235 + log.debug("删除预处理图片: {}", processedImagePath);
  236 + } else {
  237 + log.warn("删除预处理图片失败: {}", processedImagePath);
  238 + processedFile.deleteOnExit();
  239 + }
  240 + }
  241 + }
  242 + }
100 243  
101 244 /**
102   - * 优化的参数配置
  245 + * 创建优化的参数配置
103 246 */
104 247 private static ParamConfig createOptimizedParamConfig() {
105 248 ParamConfig config = new ParamConfig();
106 249  
  250 + // 文本区域扩展
107 251 config.setPadding(50);
  252 +
  253 + // 最大边长限制(0 表示不限制)
108 254 config.setMaxSideLen(0);
  255 +
  256 + // 文本块置信度阈值
109 257 config.setBoxScoreThresh(0.4f);
110 258 config.setBoxThresh(0.25f);
  259 +
  260 + // 文本区域扩展比例
111 261 config.setUnClipRatio(1.8f);
  262 +
  263 + // 角度检测
112 264 config.setDoAngle(true);
113 265 config.setMostAngle(true);
114 266  
  267 + log.debug("OCR 参数配置: padding={}, unClipRatio={}",
  268 + config.getPadding(), config.getUnClipRatio());
  269 +
115 270 return config;
116 271 }
117 272  
118 273 /**
119   - * 图像预处理 - 直接返回处理后的 BufferedImage
  274 + * 图像预处理
120 275 */
121 276 private static BufferedImage preprocessImage(File imageFile) throws IOException {
122 277 BufferedImage original = ImageIO.read(imageFile);
... ... @@ -124,36 +279,21 @@ public class OcrUtil {
124 279 throw new IOException("无法读取图片: " + imageFile.getPath());
125 280 }
126 281  
127   - System.out.println("原始尺寸: " + original.getWidth() + "x" + original.getHeight());
  282 + log.debug("原始图片尺寸: {}x{}", original.getWidth(), original.getHeight());
128 283  
129 284 BufferedImage processed = original;
130 285  
131 286 // 1. 如果图片太大,缩小尺寸
132 287 if (processed.getWidth() > 2000 || processed.getHeight() > 2000) {
133 288 processed = resizeImage(processed, 1600, 1600);
  289 + log.debug("缩小图片尺寸: {}x{}", processed.getWidth(), processed.getHeight());
134 290 }
135 291  
136 292 // 2. 增强对比度
137 293 processed = enhanceContrast(processed);
138 294  
139   - System.out.println("处理后尺寸: " + processed.getWidth() + "x" + processed.getHeight());
140   -
141 295 return processed;
142 296 }
143   - /***
144   - * @Author 钱豹
145   - * @Date 11:01 2026/4/1
146   - * @Param [multipartFile]
147   - * @return java.io.File
148   - * @Description 图片对象转换
149   - **/
150   - public static File multipartFileToFile(MultipartFile multipartFile) throws IOException {
151   - // 创建临时文件
152   - File file = File.createTempFile("temp", null);
153   - // 将 MultipartFile 的内容传输到 File
154   - multipartFile.transferTo(file);
155   - return file;
156   - }
157 297  
158 298 /**
159 299 * 调整图片大小
... ... @@ -161,8 +301,12 @@ public class OcrUtil {
161 301 private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) {
162 302 int w = image.getWidth();
163 303 int h = image.getHeight();
  304 +
  305 + // 计算缩放比例
164 306 double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h);
165   - if (ratio >= 1) return image;
  307 + if (ratio >= 1.0) {
  308 + return image;
  309 + }
166 310  
167 311 int newW = (int) (w * ratio);
168 312 int newH = (int) (h * ratio);
... ... @@ -170,8 +314,10 @@ public class OcrUtil {
170 314 BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB);
171 315 Graphics2D g = resized.createGraphics();
172 316 g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
  317 + g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
173 318 g.drawImage(image, 0, 0, newW, newH, null);
174 319 g.dispose();
  320 +
175 321 return resized;
176 322 }
177 323  
... ... @@ -180,6 +326,7 @@ public class OcrUtil {
180 326 */
181 327 private static BufferedImage enhanceContrast(BufferedImage image) {
182 328 BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType());
  329 +
183 330 for (int y = 0; y < image.getHeight(); y++) {
184 331 for (int x = 0; x < image.getWidth(); x++) {
185 332 Color c = new Color(image.getRGB(x, y));
... ... @@ -189,21 +336,32 @@ public class OcrUtil {
189 336 result.setRGB(x, y, new Color(r, g, b).getRGB());
190 337 }
191 338 }
  339 +
192 340 return result;
193 341 }
194 342  
195   - public static void main(String[] args) {
196   - try {
197   - System.out.println("OCR 程序开始执行...");
198   -
199   - // 1. 初始化引擎(使用 v4 模型)
200   - System.out.println("正在初始化 OCR 引擎 (PP-OCRv4)...");
201   - InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);
  343 + /**
  344 + * 获取文件扩展名
  345 + */
  346 + private static String getFileExtension(String filename) {
  347 + if (StrUtil.isBlank(filename)) {
  348 + return ".jpg";
  349 + }
  350 + int lastDotIndex = filename.lastIndexOf(".");
  351 + if (lastDotIndex == -1) {
  352 + return ".jpg";
  353 + }
  354 + return filename.substring(lastDotIndex);
  355 + }
202 356  
203   - // 2. 创建优化的参数配置
204   - ParamConfig config = createOptimizedParamConfig();
  357 + /**
  358 + * 测试方法
  359 + */
  360 + public static void main(String[] args) {
  361 + String tempDir = "D:/temp/ocrJava";
205 362  
206   - // 3. 图片路径
  363 + // 测试识别
  364 + try {
207 365 String imagePath = "E:/aa/b.jpg";
208 366 File imageFile = new File(imagePath);
209 367 if (!imageFile.exists()) {
... ... @@ -211,48 +369,17 @@ public class OcrUtil {
211 369 return;
212 370 }
213 371  
214   - // 4. 图像预处理(直接处理原图,不保存临时文件)
215   - System.out.println("正在进行图像预处理...");
  372 + // 手动测试(实际使用中应该通过 MultipartFile)
216 373 BufferedImage processedImage = preprocessImage(imageFile);
  374 + String processedPath = saveProcessedImage(processedImage, tempDir);
  375 + String result = performOcr(processedPath);
217 376  
218   - // 5. 保存预处理后的图片到临时目录
219   - String processedImagePath = "D:/temp/ocrJava/processed_" + System.currentTimeMillis() + ".png";
220   - ImageIO.write(processedImage, "png", new File(processedImagePath));
221   - System.out.println("预处理图片已保存: " + processedImagePath);
222   -
223   - // 6. 执行识别
224   - System.out.println("开始识别图片...");
225   - long startTime = System.currentTimeMillis();
226   - OcrResult ocrResult = engine.runOcr(processedImagePath, config);
227   - long endTime = System.currentTimeMillis();
228   -
229   - // 7. 输出结果
230   - String text = ocrResult.getStrRes().trim();
231   - System.out.println("\n==================================");
232   - System.out.println("识别结果:");
233   - System.out.println(text);
234   - System.out.println("==================================");
235   - System.out.println("识别耗时: " + (endTime - startTime) + " ms");
236   -
237   - // 8. 输出每个文本块
238   - if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) {
239   - System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):");
240   - List<TextBlock> textBlocks = ocrResult.getTextBlocks();
241   - for (int i = 0; i < textBlocks.size(); i++) {
242   - TextBlock block = textBlocks.get(i);
243   - System.out.printf(" 块%d: %s (置信度: %.2f)%n",
244   - i + 1,
245   - block.getText(),
246   - block.getBoxScore()
247   - );
248   - }
249   - }
  377 + System.out.println("识别结果: " + result);
250 378  
251   - // 9. 清理临时文件
252   - new File(processedImagePath).delete();
  379 + // 清理
  380 + new File(processedPath).delete();
253 381  
254 382 } catch (Exception e) {
255   - System.err.println("OCR 识别失败: " + e.getMessage());
256 383 e.printStackTrace();
257 384 }
258 385 }
... ...
src/main/java/com/xly/ocr/web/OcrController.java
... ... @@ -28,10 +28,10 @@ public class OcrController {
28 28 return ResponseEntity.ok(dto);
29 29 }
30 30  
31   - @PostMapping("/batch")
32   - public ResponseEntity<List<String>> batchExtract(
33   - @RequestParam("files") List<MultipartFile> files) {
34   - List<String> results = ocrService.batchExtractText(files);
35   - return ResponseEntity.ok(results);
36   - }
  31 +// @PostMapping("/batch")
  32 +// public ResponseEntity<List<String>> batchExtract(
  33 +// @RequestParam("files") List<MultipartFile> files) {
  34 +// List<String> results = ocrService.batchExtractText(files);
  35 +// return ResponseEntity.ok(results);
  36 +// }
37 37 }
38 38 \ No newline at end of file
... ...