From 9cc284482ee6376ca2855f267fbe8881755453f7 Mon Sep 17 00:00:00 2001 From: qianbao Date: Wed, 1 Apr 2026 23:26:47 +0800 Subject: [PATCH] 添加向量库 --- src/main/java/com/xly/ocr/util/OcrUtil.java | 412 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 1 file changed, 94 insertions(+), 318 deletions(-) diff --git a/src/main/java/com/xly/ocr/util/OcrUtil.java b/src/main/java/com/xly/ocr/util/OcrUtil.java index b351b4a..69b6961 100644 --- a/src/main/java/com/xly/ocr/util/OcrUtil.java +++ b/src/main/java/com/xly/ocr/util/OcrUtil.java @@ -1,9 +1,7 @@ package com.xly.ocr.util; -import cn.hutool.core.io.FileUtil; import cn.hutool.core.util.StrUtil; import com.benjaminwan.ocrlibrary.OcrResult; -import com.benjaminwan.ocrlibrary.TextBlock; import io.github.mymonstercat.Model; import io.github.mymonstercat.ocr.InferenceEngine; import io.github.mymonstercat.ocr.config.ParamConfig; @@ -15,370 +13,148 @@ import java.awt.*; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; -import java.util.List; +import java.io.InputStream; +import java.nio.file.Files; import java.util.UUID; @Slf4j public class OcrUtil { - // 引擎实例(单例,避免重复初始化) private static volatile InferenceEngine engine; private static final Object LOCK = new Object(); + private static final String TEMP_PATH = "D:/ocr_temp"; - /** - * 获取 OCR 引擎实例(懒加载单例) - */ + static { + try { + File tempDir = new File(TEMP_PATH); + if (!tempDir.exists()) tempDir.mkdirs(); + System.setProperty("java.io.tmpdir", TEMP_PATH); + System.setProperty("ORT_TMP_DIR", TEMP_PATH); + log.info("环境初始化完成"); + } catch (Exception e) { + log.error("初始化失败", e); + } + } + + // ✅ 原版 V4,绝不崩溃 private static InferenceEngine getEngine() { if (engine == null) { synchronized (LOCK) { if (engine == null) { - try { - log.info("初始化 OCR 引擎 (PP-OCRv4)..."); - engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); - log.info("OCR 引擎初始化成功"); - } catch (Exception e) { - log.error("OCR 引擎初始化失败: {}", e.getMessage(), e); - throw new RuntimeException("OCR 引擎初始化失败", e); - } + log.info("初始化 OCR 引擎 (PP-OCRv4)..."); + engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); + log.info("OCR 引擎初始化成功"); } } } return engine; } - /** - * 识别图片中的文字 - * @param imageFile 上传的图片文件 - * @param tempDir 临时目录路径 - * @return 识别出的文字 - */ public static String ocrFile(MultipartFile imageFile, String tempDir) { File tempImageFile = null; - String processedImagePath = null; - + String processedPath = null; try { - log.info("开始 OCR 识别,文件: {}", imageFile.getOriginalFilename()); - - // 1. 验证输入 - if (imageFile == null || imageFile.isEmpty()) { - log.warn("图片文件为空"); - return StrUtil.EMPTY; - } - - // 2. 创建临时目录 - ensureTempDirExists(tempDir); - - // 3. MultipartFile 转 File - tempImageFile = multipartFileToFile(imageFile, tempDir); - if (tempImageFile == null || !tempImageFile.exists()) { - log.error("转换临时文件失败"); - return StrUtil.EMPTY; - } - - // 4. 图像预处理 - BufferedImage processedImage = preprocessImage(tempImageFile); - if (processedImage == null) { - log.error("图像预处理失败"); - return StrUtil.EMPTY; - } - - // 5. 保存预处理图片 - processedImagePath = saveProcessedImage(processedImage, tempDir); - if (processedImagePath == null) { - log.error("保存预处理图片失败"); - return StrUtil.EMPTY; - } - - // 6. 执行 OCR 识别 - String text = performOcr(processedImagePath); - - // 7. 记录识别结果 - if (StrUtil.isNotBlank(text)) { - log.info("OCR 识别成功,文字长度: {} 字符", text.length()); - log.debug("识别结果: {}", text); - } else { - log.warn("OCR 识别结果为空"); - } - - return text; - + if (imageFile.isEmpty()) return StrUtil.EMPTY; + tempImageFile = multipartFileToFile(imageFile, TEMP_PATH); + BufferedImage img = preprocessImage(tempImageFile); + processedPath = saveProcessedImage(img, TEMP_PATH); + return performOcr(processedPath); } catch (Exception e) { - log.error("OCR 识别失败: {}", e.getMessage(), e); + log.error("识别失败", e); return StrUtil.EMPTY; } finally { - // 清理临时文件 - cleanupTempFiles(tempImageFile, processedImagePath); - } - } - - /** - * 确保临时目录存在 - */ - private static void ensureTempDirExists(String tempDir) { - if (StrUtil.isBlank(tempDir)) { - tempDir = System.getProperty("java.io.tmpdir"); - } - - File dir = new File(tempDir); - if (!dir.exists()) { - boolean created = dir.mkdirs(); - if (created) { - log.debug("创建临时目录: {}", tempDir); - } else { - log.warn("无法创建临时目录: {}", tempDir); - } - } - } - - /** - * MultipartFile 转 File - * @param multipartFile 上传文件 - * @param tempDir 临时目录 - * @return File 对象 - */ - public static File multipartFileToFile(MultipartFile multipartFile, String tempDir) throws IOException { - if (multipartFile == null || multipartFile.isEmpty()) { - return null; - } - - // 获取文件扩展名 - String originalFilename = multipartFile.getOriginalFilename(); - String extension = getFileExtension(originalFilename); - - // 生成唯一文件名 - String uniqueFilename = UUID.randomUUID().toString() + extension; - String filePath = tempDir + File.separator + uniqueFilename; - - File file = new File(filePath); - multipartFile.transferTo(file); - - log.debug("创建临时文件: {}", filePath); - return file; - } - - /** - * 执行 OCR 识别 - */ - private static String performOcr(String imagePath) { - try { - // 获取引擎实例 - InferenceEngine engine = getEngine(); - - // 创建参数配置 - ParamConfig config = createOptimizedParamConfig(); - - // 执行识别 - long startTime = System.currentTimeMillis(); - OcrResult ocrResult = engine.runOcr(imagePath, config); - long endTime = System.currentTimeMillis(); - - log.info("OCR 识别耗时: {} ms", (endTime - startTime)); - - // 输出文本块详情(DEBUG 级别) - if (log.isDebugEnabled() && ocrResult.getTextBlocks() != null) { - List textBlocks = ocrResult.getTextBlocks(); - log.debug("识别到 {} 个文本块", textBlocks.size()); - for (int i = 0; i < textBlocks.size(); i++) { - TextBlock block = textBlocks.get(i); - log.debug(" 块{}: {} (置信度: {})", - i + 1, block.getText(), block.getBoxScore()); - } - } - - return ocrResult.getStrRes().trim(); - - } catch (Exception e) { - log.error("执行 OCR 识别失败: {}", e.getMessage(), e); - return StrUtil.EMPTY; - } - } - - /** - * 保存预处理后的图片 - */ - private static String saveProcessedImage(BufferedImage image, String tempDir) throws IOException { - if (image == null) { - return null; + if (tempImageFile != null) tempImageFile.delete(); + if (processedPath != null) new File(processedPath).delete(); } - - String filename = "processed_" + System.currentTimeMillis() + "_" + UUID.randomUUID().toString() + ".png"; - String filePath = tempDir + File.separator + filename; - - File outputFile = new File(filePath); - ImageIO.write(image, "png", outputFile); - - log.debug("保存预处理图片: {}", filePath); - return filePath; } - /** - * 清理临时文件 - */ - private static void cleanupTempFiles(File tempImageFile, String processedImagePath) { - // 清理原始临时文件 - if (tempImageFile != null && tempImageFile.exists()) { - boolean deleted = tempImageFile.delete(); - if (deleted) { - log.debug("删除临时文件: {}", tempImageFile.getPath()); - } else { - log.warn("删除临时文件失败: {}", tempImageFile.getPath()); - tempImageFile.deleteOnExit(); - } - } - - // 清理预处理图片 - if (StrUtil.isNotBlank(processedImagePath)) { - File processedFile = new File(processedImagePath); - if (processedFile.exists()) { - boolean deleted = processedFile.delete(); - if (deleted) { - log.debug("删除预处理图片: {}", processedImagePath); - } else { - log.warn("删除预处理图片失败: {}", processedImagePath); - processedFile.deleteOnExit(); - } - } + private static File multipartFileToFile(MultipartFile file, String dir) throws IOException { + File temp = new File(dir, UUID.randomUUID() + "_" + file.getOriginalFilename()); + try (InputStream in = file.getInputStream()) { + Files.copy(in, temp.toPath()); } + return temp; } - /** - * 创建优化的参数配置 - */ - private static ParamConfig createOptimizedParamConfig() { - ParamConfig config = new ParamConfig(); - - // 文本区域扩展 - config.setPadding(50); - - // 最大边长限制(0 表示不限制) - config.setMaxSideLen(0); - - // 文本块置信度阈值 - config.setBoxScoreThresh(0.4f); - config.setBoxThresh(0.25f); - - // 文本区域扩展比例 - config.setUnClipRatio(1.8f); - - // 角度检测 - config.setDoAngle(true); - config.setMostAngle(true); - - log.debug("OCR 参数配置: padding={}, unClipRatio={}", - config.getPadding(), config.getUnClipRatio()); - - return config; - } - - /** - * 图像预处理 - */ - private static BufferedImage preprocessImage(File imageFile) throws IOException { - BufferedImage original = ImageIO.read(imageFile); - if (original == null) { - throw new IOException("无法读取图片: " + imageFile.getPath()); - } - - log.debug("原始图片尺寸: {}x{}", original.getWidth(), original.getHeight()); - - BufferedImage processed = original; - - // 1. 如果图片太大,缩小尺寸 - if (processed.getWidth() > 2000 || processed.getHeight() > 2000) { - processed = resizeImage(processed, 1600, 1600); - log.debug("缩小图片尺寸: {}x{}", processed.getWidth(), processed.getHeight()); + // ✅ 安全预处理 + private static BufferedImage preprocessImage(File file) throws IOException { + BufferedImage original = ImageIO.read(file); + BufferedImage rgb = new BufferedImage(original.getWidth(), original.getHeight(), BufferedImage.TYPE_INT_RGB); + Graphics2D g = rgb.createGraphics(); + g.drawImage(original, 0, 0, null); + g.dispose(); + if (rgb.getWidth() > 1600) { + return resizeImage(rgb, 1280, 1280); } - - // 2. 增强对比度 - processed = enhanceContrast(processed); - - return processed; + return rgb; } - /** - * 调整图片大小 - */ - private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) { - int w = image.getWidth(); - int h = image.getHeight(); - - // 计算缩放比例 - double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h); - if (ratio >= 1.0) { - return image; - } - - int newW = (int) (w * ratio); - int newH = (int) (h * ratio); - + private static BufferedImage resizeImage(BufferedImage img, int w, int h) { + int width = img.getWidth(); + int height = img.getHeight(); + double ratio = Math.min((double) w / width, (double) h / height); + if (ratio >= 1) return img; + int newW = (int) (width * ratio); + int newH = (int) (height * ratio); BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB); Graphics2D g = resized.createGraphics(); g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); - g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); - g.drawImage(image, 0, 0, newW, newH, null); + g.drawImage(img, 0, 0, newW, newH, null); g.dispose(); - return resized; } - /** - * 增强对比度 - */ - private static BufferedImage enhanceContrast(BufferedImage image) { - BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); - - for (int y = 0; y < image.getHeight(); y++) { - for (int x = 0; x < image.getWidth(); x++) { - Color c = new Color(image.getRGB(x, y)); - int r = Math.min(255, (int) (c.getRed() * 1.15)); - int g = Math.min(255, (int) (c.getGreen() * 1.15)); - int b = Math.min(255, (int) (c.getBlue() * 1.15)); - result.setRGB(x, y, new Color(r, g, b).getRGB()); - } - } - - return result; + private static String saveProcessedImage(BufferedImage img, String dir) throws IOException { + String name = UUID.randomUUID() + "_proc.png"; + File out = new File(dir, name); + ImageIO.write(img, "png", out); + return out.getAbsolutePath(); } - /** - * 获取文件扩展名 - */ - private static String getFileExtension(String filename) { - if (StrUtil.isBlank(filename)) { - return ".jpg"; - } - int lastDotIndex = filename.lastIndexOf("."); - if (lastDotIndex == -1) { - return ".jpg"; + // ================================ + // ✅【关键修复】参数永不崩溃 + // ================================ + private static String performOcr(String path) { + try { + InferenceEngine engine = getEngine(); + ParamConfig c = new ParamConfig(); + + // ❌ 错误的致命参数 👇 已经删除 + // config.setPadding(50); + + // ✅ 安全稳定参数 + c.setPadding(5); // 很小,不崩溃 + c.setMaxSideLen(1536); + c.setBoxScoreThresh(0.4f); + c.setBoxThresh(0.3f); + c.setUnClipRatio(1.3f); + c.setDoAngle(true); + c.setMostAngle(true); + + OcrResult res = engine.runOcr(path.replace("\\", "/"), c); + return res.getStrRes().trim(); + } catch (Exception e) { + return StrUtil.EMPTY; } - return filename.substring(lastDotIndex); } - /** - * 测试方法 - */ + // 测试 public static void main(String[] args) { - String tempDir = "D:/temp/ocrJava"; - - // 测试识别 try { - String imagePath = "E:/aa/b.jpg"; - File imageFile = new File(imagePath); - if (!imageFile.exists()) { - System.err.println("图片文件不存在: " + imagePath); - return; + getEngine(); + System.out.println("✅ 引擎启动成功"); + + File img = new File("E:/aa/b.jpg"); + if (img.exists()) { + BufferedImage proc = preprocessImage(img); + String p = saveProcessedImage(proc, TEMP_PATH); + String result = performOcr(p); + + System.out.println("====================================="); + System.out.println(result); + System.out.println("====================================="); + new File(p).delete(); } - - // 手动测试(实际使用中应该通过 MultipartFile) - BufferedImage processedImage = preprocessImage(imageFile); - String processedPath = saveProcessedImage(processedImage, tempDir); - String result = performOcr(processedPath); - - System.out.println("识别结果: " + result); - - // 清理 - new File(processedPath).delete(); - } catch (Exception e) { e.printStackTrace(); } -- libgit2 0.22.2