OcrUtil.java 10.1 KB
package com.xly.ocr.util;

import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.StrUtil;
import com.benjaminwan.ocrlibrary.OcrResult;
import com.benjaminwan.ocrlibrary.TextBlock;
import io.github.mymonstercat.Model;
import io.github.mymonstercat.ocr.InferenceEngine;
import io.github.mymonstercat.ocr.config.ParamConfig;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.multipart.MultipartFile;

import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.List;

@Slf4j
public class OcrUtil {

    static {
        try {
            String customTempDir = "D:/temp/ocrJava";
            File tempDir = new File(customTempDir);
            if (!tempDir.exists()) {
                tempDir.mkdirs();
            }
            System.setProperty("java.io.tmpdir", customTempDir);
            System.setProperty("TMP", customTempDir);
            System.setProperty("TEMP", customTempDir);

            System.out.println("==================================");
            System.out.println("临时目录: " + System.getProperty("java.io.tmpdir"));
            System.out.println("==================================");

        } catch (Exception e) {
            System.err.println("设置临时目录失败: " + e.getMessage());
        }
    }

    public static String ocrFile(MultipartFile imageFile, String sTmpPath){
        String processedImagePath = StrUtil.EMPTY;
        try {
            log.info("OCR 程序开始执行...");
            // 1. 初始化引擎(使用 v4 模型)
            log.info("正在初始化 OCR 引擎 (PP-OCRv4)...");
            InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);
            // 2. 创建优化的参数配置
            ParamConfig config = createOptimizedParamConfig();
            // 4. 图像预处理(直接处理原图,不保存临时文件)
            System.out.println("正在进行图像预处理...");
            File file = multipartFileToFile(imageFile);
            BufferedImage processedImage = preprocessImage(file);
            // 5. 保存预处理后的图片到临时目录
            if(!FileUtil.exist(sTmpPath)){
                FileUtil.mkdir(sTmpPath);
            }
            processedImagePath = sTmpPath+"/processed_" + System.currentTimeMillis() + ".png";
            ImageIO.write(processedImage, "png", new File(processedImagePath));
            log.info("预处理图片已保存: " + processedImagePath);
            // 6. 执行识别
            log.info("开始识别图片...");
            long startTime = System.currentTimeMillis();
            OcrResult ocrResult = engine.runOcr(processedImagePath, config);
            long endTime = System.currentTimeMillis();
            // 7. 输出结果
            String text = ocrResult.getStrRes().trim();
            log.info("\n==================================");
            log.info("识别结果:");
            log.info(text);
            log.info("==================================");
            log.info("识别耗时: " + (endTime - startTime) + " ms");
            // 8. 输出每个文本块
//            if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) {
//                System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):");
//                List<TextBlock> textBlocks = ocrResult.getTextBlocks();
//                for (int i = 0; i < textBlocks.size(); i++) {
//                    TextBlock block = textBlocks.get(i);
//                    System.out.printf("  块%d: %s (置信度: %.2f)%n",
//                            i + 1,
//                            block.getText(),
//                            block.getBoxScore()
//                    );
//                }
//            }
            return text;

        } catch (Exception e) {
            System.err.println("OCR 识别失败: " + e.getMessage());
            e.printStackTrace();
        }finally {
            // 9. 清理临时文件
            FileUtil.del(processedImagePath);
        }
        return StrUtil.EMPTY;
    }


    /**
     * 优化的参数配置
     */
    private static ParamConfig createOptimizedParamConfig() {
        ParamConfig config = new ParamConfig();

        config.setPadding(50);
        config.setMaxSideLen(0);
        config.setBoxScoreThresh(0.4f);
        config.setBoxThresh(0.25f);
        config.setUnClipRatio(1.8f);
        config.setDoAngle(true);
        config.setMostAngle(true);

        return config;
    }

    /**
     * 图像预处理 - 直接返回处理后的 BufferedImage
     */
    private static BufferedImage preprocessImage(File imageFile) throws IOException {
        BufferedImage original = ImageIO.read(imageFile);
        if (original == null) {
            throw new IOException("无法读取图片: " + imageFile.getPath());
        }

        System.out.println("原始尺寸: " + original.getWidth() + "x" + original.getHeight());

        BufferedImage processed = original;

        // 1. 如果图片太大,缩小尺寸
        if (processed.getWidth() > 2000 || processed.getHeight() > 2000) {
            processed = resizeImage(processed, 1600, 1600);
        }

        // 2. 增强对比度
        processed = enhanceContrast(processed);

        System.out.println("处理后尺寸: " + processed.getWidth() + "x" + processed.getHeight());

        return processed;
    }
    /***
     * @Author 钱豹
     * @Date 11:01 2026/4/1
     * @Param [multipartFile]
     * @return java.io.File
     * @Description 图片对象转换
     **/
    public static File multipartFileToFile(MultipartFile multipartFile) throws IOException {
        // 创建临时文件
        File file = File.createTempFile("temp", null);
        // 将 MultipartFile 的内容传输到 File
        multipartFile.transferTo(file);
        return file;
    }

    /**
     * 调整图片大小
     */
    private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) {
        int w = image.getWidth();
        int h = image.getHeight();
        double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h);
        if (ratio >= 1) return image;

        int newW = (int) (w * ratio);
        int newH = (int) (h * ratio);

        BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB);
        Graphics2D g = resized.createGraphics();
        g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
        g.drawImage(image, 0, 0, newW, newH, null);
        g.dispose();
        return resized;
    }

    /**
     * 增强对比度
     */
    private static BufferedImage enhanceContrast(BufferedImage image) {
        BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType());
        for (int y = 0; y < image.getHeight(); y++) {
            for (int x = 0; x < image.getWidth(); x++) {
                Color c = new Color(image.getRGB(x, y));
                int r = Math.min(255, (int) (c.getRed() * 1.15));
                int g = Math.min(255, (int) (c.getGreen() * 1.15));
                int b = Math.min(255, (int) (c.getBlue() * 1.15));
                result.setRGB(x, y, new Color(r, g, b).getRGB());
            }
        }
        return result;
    }

    public static void main(String[] args) {
        try {
            System.out.println("OCR 程序开始执行...");

            // 1. 初始化引擎(使用 v4 模型)
            System.out.println("正在初始化 OCR 引擎 (PP-OCRv4)...");
            InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4);

            // 2. 创建优化的参数配置
            ParamConfig config = createOptimizedParamConfig();

            // 3. 图片路径
            String imagePath = "E:/aa/b.jpg";
            File imageFile = new File(imagePath);
            if (!imageFile.exists()) {
                System.err.println("图片文件不存在: " + imagePath);
                return;
            }

            // 4. 图像预处理(直接处理原图,不保存临时文件)
            System.out.println("正在进行图像预处理...");
            BufferedImage processedImage = preprocessImage(imageFile);

            // 5. 保存预处理后的图片到临时目录
            String processedImagePath = "D:/temp/ocrJava/processed_" + System.currentTimeMillis() + ".png";
            ImageIO.write(processedImage, "png", new File(processedImagePath));
            System.out.println("预处理图片已保存: " + processedImagePath);

            // 6. 执行识别
            System.out.println("开始识别图片...");
            long startTime = System.currentTimeMillis();
            OcrResult ocrResult = engine.runOcr(processedImagePath, config);
            long endTime = System.currentTimeMillis();

            // 7. 输出结果
            String text = ocrResult.getStrRes().trim();
            System.out.println("\n==================================");
            System.out.println("识别结果:");
            System.out.println(text);
            System.out.println("==================================");
            System.out.println("识别耗时: " + (endTime - startTime) + " ms");

            // 8. 输出每个文本块
            if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) {
                System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):");
                List<TextBlock> textBlocks = ocrResult.getTextBlocks();
                for (int i = 0; i < textBlocks.size(); i++) {
                    TextBlock block = textBlocks.get(i);
                    System.out.printf("  块%d: %s (置信度: %.2f)%n",
                            i + 1,
                            block.getText(),
                            block.getBoxScore()
                    );
                }
            }

            // 9. 清理临时文件
            new File(processedImagePath).delete();

        } catch (Exception e) {
            System.err.println("OCR 识别失败: " + e.getMessage());
            e.printStackTrace();
        }
    }
}