package com.xly.ocr.util; import cn.hutool.core.io.FileUtil; import cn.hutool.core.util.StrUtil; import com.benjaminwan.ocrlibrary.OcrResult; import com.benjaminwan.ocrlibrary.TextBlock; import io.github.mymonstercat.Model; import io.github.mymonstercat.ocr.InferenceEngine; import io.github.mymonstercat.ocr.config.ParamConfig; import lombok.extern.slf4j.Slf4j; import org.springframework.web.multipart.MultipartFile; import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.List; @Slf4j public class OcrUtil { static { try { String customTempDir = "D:/temp/ocrJava"; File tempDir = new File(customTempDir); if (!tempDir.exists()) { tempDir.mkdirs(); } System.setProperty("java.io.tmpdir", customTempDir); System.setProperty("TMP", customTempDir); System.setProperty("TEMP", customTempDir); System.out.println("=================================="); System.out.println("临时目录: " + System.getProperty("java.io.tmpdir")); System.out.println("=================================="); } catch (Exception e) { System.err.println("设置临时目录失败: " + e.getMessage()); } } public static String ocrFile(MultipartFile imageFile, String sTmpPath){ String processedImagePath = StrUtil.EMPTY; try { log.info("OCR 程序开始执行..."); // 1. 初始化引擎(使用 v4 模型) log.info("正在初始化 OCR 引擎 (PP-OCRv4)..."); InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); // 2. 创建优化的参数配置 ParamConfig config = createOptimizedParamConfig(); // 4. 图像预处理(直接处理原图,不保存临时文件) System.out.println("正在进行图像预处理..."); File file = multipartFileToFile(imageFile); BufferedImage processedImage = preprocessImage(file); // 5. 保存预处理后的图片到临时目录 if(!FileUtil.exist(sTmpPath)){ FileUtil.mkdir(sTmpPath); } processedImagePath = sTmpPath+"/processed_" + System.currentTimeMillis() + ".png"; ImageIO.write(processedImage, "png", new File(processedImagePath)); log.info("预处理图片已保存: " + processedImagePath); // 6. 执行识别 log.info("开始识别图片..."); long startTime = System.currentTimeMillis(); OcrResult ocrResult = engine.runOcr(processedImagePath, config); long endTime = System.currentTimeMillis(); // 7. 输出结果 String text = ocrResult.getStrRes().trim(); log.info("\n=================================="); log.info("识别结果:"); log.info(text); log.info("=================================="); log.info("识别耗时: " + (endTime - startTime) + " ms"); // 8. 输出每个文本块 // if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) { // System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):"); // List textBlocks = ocrResult.getTextBlocks(); // for (int i = 0; i < textBlocks.size(); i++) { // TextBlock block = textBlocks.get(i); // System.out.printf(" 块%d: %s (置信度: %.2f)%n", // i + 1, // block.getText(), // block.getBoxScore() // ); // } // } return text; } catch (Exception e) { System.err.println("OCR 识别失败: " + e.getMessage()); e.printStackTrace(); }finally { // 9. 清理临时文件 FileUtil.del(processedImagePath); } return StrUtil.EMPTY; } /** * 优化的参数配置 */ private static ParamConfig createOptimizedParamConfig() { ParamConfig config = new ParamConfig(); config.setPadding(50); config.setMaxSideLen(0); config.setBoxScoreThresh(0.4f); config.setBoxThresh(0.25f); config.setUnClipRatio(1.8f); config.setDoAngle(true); config.setMostAngle(true); return config; } /** * 图像预处理 - 直接返回处理后的 BufferedImage */ private static BufferedImage preprocessImage(File imageFile) throws IOException { BufferedImage original = ImageIO.read(imageFile); if (original == null) { throw new IOException("无法读取图片: " + imageFile.getPath()); } System.out.println("原始尺寸: " + original.getWidth() + "x" + original.getHeight()); BufferedImage processed = original; // 1. 如果图片太大,缩小尺寸 if (processed.getWidth() > 2000 || processed.getHeight() > 2000) { processed = resizeImage(processed, 1600, 1600); } // 2. 增强对比度 processed = enhanceContrast(processed); System.out.println("处理后尺寸: " + processed.getWidth() + "x" + processed.getHeight()); return processed; } /*** * @Author 钱豹 * @Date 11:01 2026/4/1 * @Param [multipartFile] * @return java.io.File * @Description 图片对象转换 **/ public static File multipartFileToFile(MultipartFile multipartFile) throws IOException { // 创建临时文件 File file = File.createTempFile("temp", null); // 将 MultipartFile 的内容传输到 File multipartFile.transferTo(file); return file; } /** * 调整图片大小 */ private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) { int w = image.getWidth(); int h = image.getHeight(); double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h); if (ratio >= 1) return image; int newW = (int) (w * ratio); int newH = (int) (h * ratio); BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB); Graphics2D g = resized.createGraphics(); g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); g.drawImage(image, 0, 0, newW, newH, null); g.dispose(); return resized; } /** * 增强对比度 */ private static BufferedImage enhanceContrast(BufferedImage image) { BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); for (int y = 0; y < image.getHeight(); y++) { for (int x = 0; x < image.getWidth(); x++) { Color c = new Color(image.getRGB(x, y)); int r = Math.min(255, (int) (c.getRed() * 1.15)); int g = Math.min(255, (int) (c.getGreen() * 1.15)); int b = Math.min(255, (int) (c.getBlue() * 1.15)); result.setRGB(x, y, new Color(r, g, b).getRGB()); } } return result; } public static void main(String[] args) { try { System.out.println("OCR 程序开始执行..."); // 1. 初始化引擎(使用 v4 模型) System.out.println("正在初始化 OCR 引擎 (PP-OCRv4)..."); InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); // 2. 创建优化的参数配置 ParamConfig config = createOptimizedParamConfig(); // 3. 图片路径 String imagePath = "E:/aa/b.jpg"; File imageFile = new File(imagePath); if (!imageFile.exists()) { System.err.println("图片文件不存在: " + imagePath); return; } // 4. 图像预处理(直接处理原图,不保存临时文件) System.out.println("正在进行图像预处理..."); BufferedImage processedImage = preprocessImage(imageFile); // 5. 保存预处理后的图片到临时目录 String processedImagePath = "D:/temp/ocrJava/processed_" + System.currentTimeMillis() + ".png"; ImageIO.write(processedImage, "png", new File(processedImagePath)); System.out.println("预处理图片已保存: " + processedImagePath); // 6. 执行识别 System.out.println("开始识别图片..."); long startTime = System.currentTimeMillis(); OcrResult ocrResult = engine.runOcr(processedImagePath, config); long endTime = System.currentTimeMillis(); // 7. 输出结果 String text = ocrResult.getStrRes().trim(); System.out.println("\n=================================="); System.out.println("识别结果:"); System.out.println(text); System.out.println("=================================="); System.out.println("识别耗时: " + (endTime - startTime) + " ms"); // 8. 输出每个文本块 if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) { System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):"); List textBlocks = ocrResult.getTextBlocks(); for (int i = 0; i < textBlocks.size(); i++) { TextBlock block = textBlocks.get(i); System.out.printf(" 块%d: %s (置信度: %.2f)%n", i + 1, block.getText(), block.getBoxScore() ); } } // 9. 清理临时文件 new File(processedImagePath).delete(); } catch (Exception e) { System.err.println("OCR 识别失败: " + e.getMessage()); e.printStackTrace(); } } }