diff --git a/src/main/java/com/xly/ocr/service/OcrService.java b/src/main/java/com/xly/ocr/service/OcrService.java index 80a00bd..965fd95 100644 --- a/src/main/java/com/xly/ocr/service/OcrService.java +++ b/src/main/java/com/xly/ocr/service/OcrService.java @@ -2,21 +2,10 @@ package com.xly.ocr.service; import com.xly.ocr.util.OcrUtil; import lombok.extern.slf4j.Slf4j; -import net.sourceforge.tess4j.Tesseract; -import net.sourceforge.tess4j.TesseractException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; -import javax.imageio.ImageIO; -import java.awt.*; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.Arrays; import java.util.List; @@ -24,473 +13,64 @@ import java.util.List; @Service("ocrService") public class OcrService { - private static final Logger logger = LoggerFactory.getLogger(OcrService.class); - @Value("${ocr.tmpPath}") private String tmpPath; - - private final Tesseract tesseract; - - // 配置参数 - private static final List ALLOWED_EXTENSIONS = Arrays.asList(".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"); + private static final List ALLOWED_EXTENSIONS = Arrays.asList( + ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif" + ); private static final long MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB - private static final int BINARIZE_THRESHOLD = 127; - private static final int MIN_WIDTH = 800; - private static final int MIN_HEIGHT = 200; - - // 性能统计 - private static class OcrStats { - long preprocessTime = 0; - long ocrTime = 0; - String imageSize = ""; - - @Override - public String toString() { - return String.format("预处理耗时: %dms, OCR耗时: %dms, 图片尺寸: %s", - preprocessTime, ocrTime, imageSize); - } - } - - public OcrService(@Value("${tesseract.datapath}") String dataPath) { - this.tesseract = new Tesseract(); - - // 基础配置 - this.tesseract.setDatapath(dataPath); - this.tesseract.setLanguage("chi_sim+eng"); - - // 优化识别参数 - configureTesseract(); - - logger.info("Tesseract 初始化完成,语言包路径: {}, 语言: chi_sim+eng", dataPath); - } - - /** - * 配置 Tesseract 参数 - */ - private void configureTesseract() { - // 页面分割模式:3 = 自动页面分割,但没有方向检测 - this.tesseract.setPageSegMode(3); - - // OCR 引擎模式:3 = 默认,基于 LSTM 和传统引擎 - this.tesseract.setOcrEngineMode(3); - - // 提高中文识别率 - this.tesseract.setVariable("preserve_interword_spaces", "1"); - this.tesseract.setVariable("textord_force_make_prop_words", "true"); - - // 可选:设置字符白名单(根据需要启用) - // this.tesseract.setVariable("tessedit_char_whitelist", - // "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,。!?;:\"‘’“”【】()《》"); - - // 可选:设置黑名单(排除干扰字符) - // this.tesseract.setVariable("tessedit_char_blacklist", "|\\/`~@#$%^&*()_+={}[]"); - } - - /** - * 图片预处理 - 优化的处理流程 - */ - private BufferedImage preprocessImage(BufferedImage originalImage) { - if (originalImage == null) { - return null; - } - - try { - long startTime = System.currentTimeMillis(); - - // 1. 自动调整亮度和对比度 - BufferedImage adjusted = autoAdjustBrightnessContrast(originalImage); - - // 2. 灰度化 - BufferedImage grayImage = toGray(adjusted); - - // 3. 自适应二值化(比固定阈值更好) - BufferedImage binaryImage = adaptiveBinarize(grayImage); - - // 4. 降噪处理 - BufferedImage denoisedImage = denoise(binaryImage); - - // 5. 放大图片(如果太小) - BufferedImage scaledImage = scaleImageIfNeeded(denoisedImage); - - // 6. 可选:边缘增强(提高清晰度) - BufferedImage enhancedImage = sharpen(scaledImage); - - long endTime = System.currentTimeMillis(); - logger.debug("图片预处理耗时: {}ms", endTime - startTime); - - return enhancedImage; - - } catch (Exception e) { - logger.error("图片预处理失败: {}", e.getMessage(), e); - return originalImage; - } - } - - /** - * 自动调整亮度和对比度 - */ - private BufferedImage autoAdjustBrightnessContrast(BufferedImage image) { - BufferedImage result = new BufferedImage( - image.getWidth(), image.getHeight(), image.getType()); - - // 计算亮度直方图 - int[] histogram = new int[256]; - for (int y = 0; y < image.getHeight(); y++) { - for (int x = 0; x < image.getWidth(); x++) { - int rgb = image.getRGB(x, y); - int gray = (int)((rgb >> 16 & 0xFF) * 0.299 + - (rgb >> 8 & 0xFF) * 0.587 + - (rgb & 0xFF) * 0.114); - histogram[gray]++; - } - } - - // 找到黑色和白色的阈值 - int total = image.getWidth() * image.getHeight(); - int blackThreshold = 0; - int whiteThreshold = 255; - - int sum = 0; - for (int i = 0; i < 256; i++) { - sum += histogram[i]; - if (sum > total * 0.05) { - blackThreshold = i; - break; - } - } - - sum = 0; - for (int i = 255; i >= 0; i--) { - sum += histogram[i]; - if (sum > total * 0.05) { - whiteThreshold = i; - break; - } - } - - // 应用对比度拉伸 - for (int y = 0; y < image.getHeight(); y++) { - for (int x = 0; x < image.getWidth(); x++) { - int rgb = image.getRGB(x, y); - int r = (rgb >> 16) & 0xFF; - int g = (rgb >> 8) & 0xFF; - int b = rgb & 0xFF; - - // 拉伸到 0-255 范围 - r = stretchValue(r, blackThreshold, whiteThreshold); - g = stretchValue(g, blackThreshold, whiteThreshold); - b = stretchValue(b, blackThreshold, whiteThreshold); - - result.setRGB(x, y, (r << 16) | (g << 8) | b); - } - } - - return result; - } - - private int stretchValue(int value, int black, int white) { - if (value <= black) return 0; - if (value >= white) return 255; - return (value - black) * 255 / (white - black); - } - - /** - * 灰度化 - */ - private BufferedImage toGray(BufferedImage image) { - BufferedImage result = new BufferedImage( - image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); - Graphics g = result.getGraphics(); - g.drawImage(image, 0, 0, null); - g.dispose(); - return result; - } - - /** - * 自适应二值化 - 根据局部区域动态调整阈值 - */ - private BufferedImage adaptiveBinarize(BufferedImage image) { - BufferedImage result = new BufferedImage( - image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY); - - int blockSize = 15; - int constant = 5; - - for (int y = 0; y < image.getHeight(); y++) { - for (int x = 0; x < image.getWidth(); x++) { - // 计算局部区域的平均值 - int sum = 0; - int count = 0; - for (int ky = -blockSize/2; ky <= blockSize/2; ky++) { - for (int kx = -blockSize/2; kx <= blockSize/2; kx++) { - int px = Math.min(Math.max(x + kx, 0), image.getWidth() - 1); - int py = Math.min(Math.max(y + ky, 0), image.getHeight() - 1); - sum += new Color(image.getRGB(px, py)).getRed(); - count++; - } - } - int threshold = sum / count - constant; - - // 应用阈值 - int gray = new Color(image.getRGB(x, y)).getRed(); - int binary = gray > threshold ? 255 : 0; - result.setRGB(x, y, new Color(binary, binary, binary).getRGB()); - } - } - return result; - } /** - * 降噪 - 优化的中值滤波 + * 从 MultipartFile 中提取文字 */ - private BufferedImage denoise(BufferedImage image) { - BufferedImage result = new BufferedImage( - image.getWidth(), image.getHeight(), image.getType()); - - for (int y = 1; y < image.getHeight() - 1; y++) { - for (int x = 1; x < image.getWidth() - 1; x++) { - int[] neighbors = new int[9]; - int index = 0; - for (int ky = -1; ky <= 1; ky++) { - for (int kx = -1; kx <= 1; kx++) { - neighbors[index++] = new Color(image.getRGB(x + kx, y + ky)).getRed(); - } - } - Arrays.sort(neighbors); - int median = neighbors[4]; - result.setRGB(x, y, new Color(median, median, median).getRGB()); - } - } - - // 处理边缘 - for (int x = 0; x < image.getWidth(); x++) { - result.setRGB(x, 0, image.getRGB(x, 0)); - result.setRGB(x, image.getHeight() - 1, image.getRGB(x, image.getHeight() - 1)); - } - for (int y = 0; y < image.getHeight(); y++) { - result.setRGB(0, y, image.getRGB(0, y)); - result.setRGB(image.getWidth() - 1, y, image.getRGB(image.getWidth() - 1, y)); - } - - return result; - } - - /** - * 锐化处理 - 增强文字边缘 - */ - private BufferedImage sharpen(BufferedImage image) { - BufferedImage result = new BufferedImage( - image.getWidth(), image.getHeight(), image.getType()); - - // 拉普拉斯锐化核 - float[] sharpenKernel = { - 0, -1, 0, - -1, 5, -1, - 0, -1, 0 - }; - - for (int y = 1; y < image.getHeight() - 1; y++) { - for (int x = 1; x < image.getWidth() - 1; x++) { - int sum = 0; - int index = 0; - for (int ky = -1; ky <= 1; ky++) { - for (int kx = -1; kx <= 1; kx++) { - int gray = new Color(image.getRGB(x + kx, y + ky)).getRed(); - sum += gray * sharpenKernel[index++]; - } - } - sum = Math.min(255, Math.max(0, sum)); - result.setRGB(x, y, new Color(sum, sum, sum).getRGB()); - } - } - - return result; - } - - /** - * 放大图片(如果图片太小) - */ - private BufferedImage scaleImageIfNeeded(BufferedImage image) { - int width = image.getWidth(); - int height = image.getHeight(); - - if (width >= MIN_WIDTH && height >= MIN_HEIGHT) { - return image; - } - - double scaleX = (double) MIN_WIDTH / width; - double scaleY = (double) MIN_HEIGHT / height; - double scale = Math.max(scaleX, scaleY); - - int newWidth = (int) (width * scale); - int newHeight = (int) (height * scale); - - // 使用更好的插值算法 - BufferedImage result = new BufferedImage(newWidth, newHeight, image.getType()); - Graphics2D g2d = result.createGraphics(); - g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, - RenderingHints.VALUE_INTERPOLATION_BICUBIC); - g2d.setRenderingHint(RenderingHints.KEY_RENDERING, - RenderingHints.VALUE_RENDER_QUALITY); - g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, - RenderingHints.VALUE_ANTIALIAS_ON); - g2d.drawImage(image, 0, 0, newWidth, newHeight, null); - g2d.dispose(); - - logger.debug("图片已放大: {}x{} -> {}x{}", width, height, newWidth, newHeight); - return result; - } - - /** - * 识别图片中的文字(增强版) - */ - public String extractText(File imageFile) { - if (imageFile == null || !imageFile.exists()) { - logger.error("图片文件不存在或为空"); - return "图片文件不存在"; + public String extractTextFromMultipartFile(MultipartFile file) { + // 1. 验证文件 + String validationError = validateFile(file); + if (validationError != null) { + return validationError; } - OcrStats stats = new OcrStats(); - + // 2. 调用 OCR 工具类识别 try { - logger.info("开始识别图片: {}, 大小: {} bytes", - imageFile.getAbsolutePath(), imageFile.length()); + String result = OcrUtil.ocrFile(file, tmpPath); - // 读取原始图片 - long readStart = System.currentTimeMillis(); - BufferedImage originalImage = ImageIO.read(imageFile); - if (originalImage == null) { - return "无法读取图片文件,请确保图片格式正确"; - } - stats.imageSize = originalImage.getWidth() + "x" + originalImage.getHeight(); - - // 图片预处理 - long preprocessStart = System.currentTimeMillis(); - BufferedImage processedImage = preprocessImage(originalImage); - stats.preprocessTime = System.currentTimeMillis() - preprocessStart; - - // 可选:保存预处理图片用于调试(生产环境可注释) - if (logger.isDebugEnabled()) { - saveDebugImage(processedImage, imageFile); - } - - // 执行 OCR - long ocrStart = System.currentTimeMillis(); - String result = tesseract.doOCR(processedImage); - stats.ocrTime = System.currentTimeMillis() - ocrStart; - - logger.info("识别完成 - {}", stats); - - // 清理识别结果 - result = cleanResult(result); - - if (result.isEmpty()) { - logger.warn("识别结果为空,可能需要调整预处理参数"); + if (result == null || result.trim().isEmpty()) { + log.warn("未识别到文字内容,文件: {}", file.getOriginalFilename()); + return "未识别到文字内容"; } + log.info("识别成功,文件: {}, 文字长度: {}", + file.getOriginalFilename(), result.length()); return result; - } catch (TesseractException e) { - logger.error("OCR识别失败: {}", e.getMessage(), e); + } catch (Exception e) { + log.error("OCR识别异常: {}", e.getMessage(), e); return "OCR识别失败: " + e.getMessage(); - } catch (IOException e) { - logger.error("读取图片失败: {}", e.getMessage(), e); - return "读取图片失败: " + e.getMessage(); - } - } - - /** - * 保存调试图片(仅用于调试) - */ - private void saveDebugImage(BufferedImage image, File originalFile) { - try { - String debugPath = originalFile.getParent() + "/debug_" + originalFile.getName(); - File debugFile = new File(debugPath); - ImageIO.write(image, "png", debugFile); - logger.debug("预处理图片已保存: {}", debugPath); - } catch (IOException e) { - logger.debug("保存调试图片失败: {}", e.getMessage()); - } - } - - /** - * 清理识别结果 - */ - private String cleanResult(String result) { - if (result == null || result.isEmpty()) { - return ""; - } - - // 去除首尾空白 - result = result.trim(); - - // 规范化换行符 - result = result.replaceAll("\\r\\n", "\n") - .replaceAll("\\r", "\n"); - - // 合并多个空行 - result = result.replaceAll("\n{3,}", "\n\n"); - - // 去除行首行尾空格 - String[] lines = result.split("\n"); - StringBuilder cleaned = new StringBuilder(); - for (String line : lines) { - cleaned.append(line.trim()).append("\n"); } - - return cleaned.toString().trim(); } /** - * 封装方法,接收上传的 MultipartFile + * 验证文件 */ - public String extractTextFromMultipartFile(MultipartFile file) { + private String validateFile(MultipartFile file) { if (file == null || file.isEmpty()) { - logger.warn("上传的文件为空"); + log.warn("上传的文件为空"); return "上传的文件为空"; } - // 验证文件大小 if (file.getSize() > MAX_FILE_SIZE) { - logger.warn("文件过大: {} bytes, 超过限制: {} bytes", - file.getSize(), MAX_FILE_SIZE); + log.warn("文件过大: {} bytes", file.getSize()); return String.format("文件过大,最大支持 %dMB", MAX_FILE_SIZE / 1024 / 1024); } - // 验证文件格式 String originalFilename = file.getOriginalFilename(); if (originalFilename != null && !isAllowedImage(originalFilename)) { - logger.warn("不支持的文件格式: {}", originalFilename); + log.warn("不支持的文件格式: {}", originalFilename); return "不支持的文件格式,仅支持: " + String.join(", ", ALLOWED_EXTENSIONS); } - String sText = OcrUtil.ocrFile(file,tmpPath); - return sText; - } - - /** - * 清理临时文件 - */ - private void cleanupTempFile(Path tempFile) { - if (tempFile != null) { - try { - Files.deleteIfExists(tempFile); - logger.debug("临时文件已删除: {}", tempFile); - } catch (IOException e) { - logger.warn("删除临时文件失败: {}", tempFile, e); - // 注册JVM退出时删除 - tempFile.toFile().deleteOnExit(); - } - } - } - /** - * 批量识别(用于多张图片) - */ - public List batchExtractText(List files) { - return files.stream() - .map(this::extractTextFromMultipartFile) - .collect(java.util.stream.Collectors.toList()); + return null; } /** @@ -504,14 +84,4 @@ public class OcrService { return ALLOWED_EXTENSIONS.stream() .anyMatch(lowerFilename::endsWith); } - - /** - * 获取文件扩展名 - */ - private String getFileExtension(String filename) { - if (filename == null || !filename.contains(".")) { - return ".jpg"; - } - return filename.substring(filename.lastIndexOf(".")); - } } \ No newline at end of file diff --git a/src/main/java/com/xly/ocr/util/OcrUtil.java b/src/main/java/com/xly/ocr/util/OcrUtil.java index fb231a6..b351b4a 100644 --- a/src/main/java/com/xly/ocr/util/OcrUtil.java +++ b/src/main/java/com/xly/ocr/util/OcrUtil.java @@ -16,107 +16,262 @@ import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.List; +import java.util.UUID; @Slf4j public class OcrUtil { - static { + // 引擎实例(单例,避免重复初始化) + private static volatile InferenceEngine engine; + private static final Object LOCK = new Object(); + + /** + * 获取 OCR 引擎实例(懒加载单例) + */ + private static InferenceEngine getEngine() { + if (engine == null) { + synchronized (LOCK) { + if (engine == null) { + try { + log.info("初始化 OCR 引擎 (PP-OCRv4)..."); + engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); + log.info("OCR 引擎初始化成功"); + } catch (Exception e) { + log.error("OCR 引擎初始化失败: {}", e.getMessage(), e); + throw new RuntimeException("OCR 引擎初始化失败", e); + } + } + } + } + return engine; + } + + /** + * 识别图片中的文字 + * @param imageFile 上传的图片文件 + * @param tempDir 临时目录路径 + * @return 识别出的文字 + */ + public static String ocrFile(MultipartFile imageFile, String tempDir) { + File tempImageFile = null; + String processedImagePath = null; + try { - String customTempDir = "D:/temp/ocrJava"; - File tempDir = new File(customTempDir); - if (!tempDir.exists()) { - tempDir.mkdirs(); + log.info("开始 OCR 识别,文件: {}", imageFile.getOriginalFilename()); + + // 1. 验证输入 + if (imageFile == null || imageFile.isEmpty()) { + log.warn("图片文件为空"); + return StrUtil.EMPTY; + } + + // 2. 创建临时目录 + ensureTempDirExists(tempDir); + + // 3. MultipartFile 转 File + tempImageFile = multipartFileToFile(imageFile, tempDir); + if (tempImageFile == null || !tempImageFile.exists()) { + log.error("转换临时文件失败"); + return StrUtil.EMPTY; + } + + // 4. 图像预处理 + BufferedImage processedImage = preprocessImage(tempImageFile); + if (processedImage == null) { + log.error("图像预处理失败"); + return StrUtil.EMPTY; + } + + // 5. 保存预处理图片 + processedImagePath = saveProcessedImage(processedImage, tempDir); + if (processedImagePath == null) { + log.error("保存预处理图片失败"); + return StrUtil.EMPTY; + } + + // 6. 执行 OCR 识别 + String text = performOcr(processedImagePath); + + // 7. 记录识别结果 + if (StrUtil.isNotBlank(text)) { + log.info("OCR 识别成功,文字长度: {} 字符", text.length()); + log.debug("识别结果: {}", text); + } else { + log.warn("OCR 识别结果为空"); } - System.setProperty("java.io.tmpdir", customTempDir); - System.setProperty("TMP", customTempDir); - System.setProperty("TEMP", customTempDir); - System.out.println("=================================="); - System.out.println("临时目录: " + System.getProperty("java.io.tmpdir")); - System.out.println("=================================="); + return text; } catch (Exception e) { - System.err.println("设置临时目录失败: " + e.getMessage()); + log.error("OCR 识别失败: {}", e.getMessage(), e); + return StrUtil.EMPTY; + } finally { + // 清理临时文件 + cleanupTempFiles(tempImageFile, processedImagePath); } } - public static String ocrFile(MultipartFile imageFile, String sTmpPath){ - String processedImagePath = StrUtil.EMPTY; + /** + * 确保临时目录存在 + */ + private static void ensureTempDirExists(String tempDir) { + if (StrUtil.isBlank(tempDir)) { + tempDir = System.getProperty("java.io.tmpdir"); + } + + File dir = new File(tempDir); + if (!dir.exists()) { + boolean created = dir.mkdirs(); + if (created) { + log.debug("创建临时目录: {}", tempDir); + } else { + log.warn("无法创建临时目录: {}", tempDir); + } + } + } + + /** + * MultipartFile 转 File + * @param multipartFile 上传文件 + * @param tempDir 临时目录 + * @return File 对象 + */ + public static File multipartFileToFile(MultipartFile multipartFile, String tempDir) throws IOException { + if (multipartFile == null || multipartFile.isEmpty()) { + return null; + } + + // 获取文件扩展名 + String originalFilename = multipartFile.getOriginalFilename(); + String extension = getFileExtension(originalFilename); + + // 生成唯一文件名 + String uniqueFilename = UUID.randomUUID().toString() + extension; + String filePath = tempDir + File.separator + uniqueFilename; + + File file = new File(filePath); + multipartFile.transferTo(file); + + log.debug("创建临时文件: {}", filePath); + return file; + } + + /** + * 执行 OCR 识别 + */ + private static String performOcr(String imagePath) { try { - log.info("OCR 程序开始执行..."); - // 1. 初始化引擎(使用 v4 模型) - log.info("正在初始化 OCR 引擎 (PP-OCRv4)..."); - InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); - // 2. 创建优化的参数配置 + // 获取引擎实例 + InferenceEngine engine = getEngine(); + + // 创建参数配置 ParamConfig config = createOptimizedParamConfig(); - // 4. 图像预处理(直接处理原图,不保存临时文件) - System.out.println("正在进行图像预处理..."); - File file = multipartFileToFile(imageFile); - BufferedImage processedImage = preprocessImage(file); - // 5. 保存预处理后的图片到临时目录 - if(!FileUtil.exist(sTmpPath)){ - FileUtil.mkdir(sTmpPath); - } - processedImagePath = sTmpPath+"/processed_" + System.currentTimeMillis() + ".png"; - ImageIO.write(processedImage, "png", new File(processedImagePath)); - log.info("预处理图片已保存: " + processedImagePath); - // 6. 执行识别 - log.info("开始识别图片..."); + + // 执行识别 long startTime = System.currentTimeMillis(); - OcrResult ocrResult = engine.runOcr(processedImagePath, config); + OcrResult ocrResult = engine.runOcr(imagePath, config); long endTime = System.currentTimeMillis(); - // 7. 输出结果 - String text = ocrResult.getStrRes().trim(); - log.info("\n=================================="); - log.info("识别结果:"); - log.info(text); - log.info("=================================="); - log.info("识别耗时: " + (endTime - startTime) + " ms"); - // 8. 输出每个文本块 -// if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) { -// System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):"); -// List textBlocks = ocrResult.getTextBlocks(); -// for (int i = 0; i < textBlocks.size(); i++) { -// TextBlock block = textBlocks.get(i); -// System.out.printf(" 块%d: %s (置信度: %.2f)%n", -// i + 1, -// block.getText(), -// block.getBoxScore() -// ); -// } -// } - return text; + + log.info("OCR 识别耗时: {} ms", (endTime - startTime)); + + // 输出文本块详情(DEBUG 级别) + if (log.isDebugEnabled() && ocrResult.getTextBlocks() != null) { + List textBlocks = ocrResult.getTextBlocks(); + log.debug("识别到 {} 个文本块", textBlocks.size()); + for (int i = 0; i < textBlocks.size(); i++) { + TextBlock block = textBlocks.get(i); + log.debug(" 块{}: {} (置信度: {})", + i + 1, block.getText(), block.getBoxScore()); + } + } + + return ocrResult.getStrRes().trim(); } catch (Exception e) { - System.err.println("OCR 识别失败: " + e.getMessage()); - e.printStackTrace(); - }finally { - // 9. 清理临时文件 - FileUtil.del(processedImagePath); + log.error("执行 OCR 识别失败: {}", e.getMessage(), e); + return StrUtil.EMPTY; + } + } + + /** + * 保存预处理后的图片 + */ + private static String saveProcessedImage(BufferedImage image, String tempDir) throws IOException { + if (image == null) { + return null; } - return StrUtil.EMPTY; + + String filename = "processed_" + System.currentTimeMillis() + "_" + UUID.randomUUID().toString() + ".png"; + String filePath = tempDir + File.separator + filename; + + File outputFile = new File(filePath); + ImageIO.write(image, "png", outputFile); + + log.debug("保存预处理图片: {}", filePath); + return filePath; } + /** + * 清理临时文件 + */ + private static void cleanupTempFiles(File tempImageFile, String processedImagePath) { + // 清理原始临时文件 + if (tempImageFile != null && tempImageFile.exists()) { + boolean deleted = tempImageFile.delete(); + if (deleted) { + log.debug("删除临时文件: {}", tempImageFile.getPath()); + } else { + log.warn("删除临时文件失败: {}", tempImageFile.getPath()); + tempImageFile.deleteOnExit(); + } + } + + // 清理预处理图片 + if (StrUtil.isNotBlank(processedImagePath)) { + File processedFile = new File(processedImagePath); + if (processedFile.exists()) { + boolean deleted = processedFile.delete(); + if (deleted) { + log.debug("删除预处理图片: {}", processedImagePath); + } else { + log.warn("删除预处理图片失败: {}", processedImagePath); + processedFile.deleteOnExit(); + } + } + } + } /** - * 优化的参数配置 + * 创建优化的参数配置 */ private static ParamConfig createOptimizedParamConfig() { ParamConfig config = new ParamConfig(); + // 文本区域扩展 config.setPadding(50); + + // 最大边长限制(0 表示不限制) config.setMaxSideLen(0); + + // 文本块置信度阈值 config.setBoxScoreThresh(0.4f); config.setBoxThresh(0.25f); + + // 文本区域扩展比例 config.setUnClipRatio(1.8f); + + // 角度检测 config.setDoAngle(true); config.setMostAngle(true); + log.debug("OCR 参数配置: padding={}, unClipRatio={}", + config.getPadding(), config.getUnClipRatio()); + return config; } /** - * 图像预处理 - 直接返回处理后的 BufferedImage + * 图像预处理 */ private static BufferedImage preprocessImage(File imageFile) throws IOException { BufferedImage original = ImageIO.read(imageFile); @@ -124,36 +279,21 @@ public class OcrUtil { throw new IOException("无法读取图片: " + imageFile.getPath()); } - System.out.println("原始尺寸: " + original.getWidth() + "x" + original.getHeight()); + log.debug("原始图片尺寸: {}x{}", original.getWidth(), original.getHeight()); BufferedImage processed = original; // 1. 如果图片太大,缩小尺寸 if (processed.getWidth() > 2000 || processed.getHeight() > 2000) { processed = resizeImage(processed, 1600, 1600); + log.debug("缩小图片尺寸: {}x{}", processed.getWidth(), processed.getHeight()); } // 2. 增强对比度 processed = enhanceContrast(processed); - System.out.println("处理后尺寸: " + processed.getWidth() + "x" + processed.getHeight()); - return processed; } - /*** - * @Author 钱豹 - * @Date 11:01 2026/4/1 - * @Param [multipartFile] - * @return java.io.File - * @Description 图片对象转换 - **/ - public static File multipartFileToFile(MultipartFile multipartFile) throws IOException { - // 创建临时文件 - File file = File.createTempFile("temp", null); - // 将 MultipartFile 的内容传输到 File - multipartFile.transferTo(file); - return file; - } /** * 调整图片大小 @@ -161,8 +301,12 @@ public class OcrUtil { private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) { int w = image.getWidth(); int h = image.getHeight(); + + // 计算缩放比例 double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h); - if (ratio >= 1) return image; + if (ratio >= 1.0) { + return image; + } int newW = (int) (w * ratio); int newH = (int) (h * ratio); @@ -170,8 +314,10 @@ public class OcrUtil { BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB); Graphics2D g = resized.createGraphics(); g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); + g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); g.drawImage(image, 0, 0, newW, newH, null); g.dispose(); + return resized; } @@ -180,6 +326,7 @@ public class OcrUtil { */ private static BufferedImage enhanceContrast(BufferedImage image) { BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); + for (int y = 0; y < image.getHeight(); y++) { for (int x = 0; x < image.getWidth(); x++) { Color c = new Color(image.getRGB(x, y)); @@ -189,21 +336,32 @@ public class OcrUtil { result.setRGB(x, y, new Color(r, g, b).getRGB()); } } + return result; } - public static void main(String[] args) { - try { - System.out.println("OCR 程序开始执行..."); - - // 1. 初始化引擎(使用 v4 模型) - System.out.println("正在初始化 OCR 引擎 (PP-OCRv4)..."); - InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); + /** + * 获取文件扩展名 + */ + private static String getFileExtension(String filename) { + if (StrUtil.isBlank(filename)) { + return ".jpg"; + } + int lastDotIndex = filename.lastIndexOf("."); + if (lastDotIndex == -1) { + return ".jpg"; + } + return filename.substring(lastDotIndex); + } - // 2. 创建优化的参数配置 - ParamConfig config = createOptimizedParamConfig(); + /** + * 测试方法 + */ + public static void main(String[] args) { + String tempDir = "D:/temp/ocrJava"; - // 3. 图片路径 + // 测试识别 + try { String imagePath = "E:/aa/b.jpg"; File imageFile = new File(imagePath); if (!imageFile.exists()) { @@ -211,48 +369,17 @@ public class OcrUtil { return; } - // 4. 图像预处理(直接处理原图,不保存临时文件) - System.out.println("正在进行图像预处理..."); + // 手动测试(实际使用中应该通过 MultipartFile) BufferedImage processedImage = preprocessImage(imageFile); + String processedPath = saveProcessedImage(processedImage, tempDir); + String result = performOcr(processedPath); - // 5. 保存预处理后的图片到临时目录 - String processedImagePath = "D:/temp/ocrJava/processed_" + System.currentTimeMillis() + ".png"; - ImageIO.write(processedImage, "png", new File(processedImagePath)); - System.out.println("预处理图片已保存: " + processedImagePath); - - // 6. 执行识别 - System.out.println("开始识别图片..."); - long startTime = System.currentTimeMillis(); - OcrResult ocrResult = engine.runOcr(processedImagePath, config); - long endTime = System.currentTimeMillis(); - - // 7. 输出结果 - String text = ocrResult.getStrRes().trim(); - System.out.println("\n=================================="); - System.out.println("识别结果:"); - System.out.println(text); - System.out.println("=================================="); - System.out.println("识别耗时: " + (endTime - startTime) + " ms"); - - // 8. 输出每个文本块 - if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) { - System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):"); - List textBlocks = ocrResult.getTextBlocks(); - for (int i = 0; i < textBlocks.size(); i++) { - TextBlock block = textBlocks.get(i); - System.out.printf(" 块%d: %s (置信度: %.2f)%n", - i + 1, - block.getText(), - block.getBoxScore() - ); - } - } + System.out.println("识别结果: " + result); - // 9. 清理临时文件 - new File(processedImagePath).delete(); + // 清理 + new File(processedPath).delete(); } catch (Exception e) { - System.err.println("OCR 识别失败: " + e.getMessage()); e.printStackTrace(); } } diff --git a/src/main/java/com/xly/ocr/web/OcrController.java b/src/main/java/com/xly/ocr/web/OcrController.java index f5c1117..5a33ee9 100644 --- a/src/main/java/com/xly/ocr/web/OcrController.java +++ b/src/main/java/com/xly/ocr/web/OcrController.java @@ -28,10 +28,10 @@ public class OcrController { return ResponseEntity.ok(dto); } - @PostMapping("/batch") - public ResponseEntity> batchExtract( - @RequestParam("files") List files) { - List results = ocrService.batchExtractText(files); - return ResponseEntity.ok(results); - } +// @PostMapping("/batch") +// public ResponseEntity> batchExtract( +// @RequestParam("files") List files) { +// List results = ocrService.batchExtractText(files); +// return ResponseEntity.ok(results); +// } } \ No newline at end of file