Commit b9b48f2e57e88629fbd1378bb289647d7d62be92
1 parent
e24472f7
添加向量库
Showing
19 changed files
with
1545 additions
and
230 deletions
pom.xml
| ... | ... | @@ -106,13 +106,41 @@ |
| 106 | 106 | <artifactId>spring-boot-starter-webflux</artifactId> |
| 107 | 107 | </dependency> |
| 108 | 108 | |
| 109 | + <!-- Tess4J OCR --> | |
| 109 | 110 | <dependency> |
| 110 | 111 | <groupId>net.sourceforge.tess4j</groupId> |
| 111 | 112 | <artifactId>tess4j</artifactId> |
| 112 | 113 | <version>${tess4j.version}</version> |
| 113 | - <scope>compile</scope> | |
| 114 | 114 | </dependency> |
| 115 | 115 | |
| 116 | + <!-- 官方 PaddleOCR Java SDK(最新稳定版) --> | |
| 117 | + <dependency> | |
| 118 | + <groupId>io.github.mymonstercat</groupId> | |
| 119 | + <artifactId>rapidocr</artifactId> | |
| 120 | + <version>0.0.7</version> | |
| 121 | + </dependency> | |
| 122 | + | |
| 123 | + <dependency> | |
| 124 | + <groupId>io.github.mymonstercat</groupId> | |
| 125 | + <artifactId>rapidocr-onnx-platform</artifactId> | |
| 126 | + <version>0.0.7</version> | |
| 127 | + </dependency> | |
| 128 | + | |
| 129 | + <!-- ONNX Runtime(必需) --> | |
| 130 | +<!-- <dependency>--> | |
| 131 | +<!-- <groupId>com.microsoft.onnxruntime</groupId>--> | |
| 132 | +<!-- <artifactId>onnxruntime</artifactId>--> | |
| 133 | +<!-- <version>1.17.0</version>--> | |
| 134 | +<!-- </dependency>--> | |
| 135 | + | |
| 136 | + <!-- 图片处理 --> | |
| 137 | + <dependency> | |
| 138 | + <groupId>commons-io</groupId> | |
| 139 | + <artifactId>commons-io</artifactId> | |
| 140 | + <version>2.15.1</version> | |
| 141 | + </dependency> | |
| 142 | + | |
| 143 | + | |
| 116 | 144 | <dependency> |
| 117 | 145 | <groupId>org.springframework.boot</groupId> |
| 118 | 146 | <artifactId>spring-boot-starter-thymeleaf</artifactId> |
| ... | ... | @@ -335,12 +363,7 @@ |
| 335 | 363 | <version>${langchain4j.version}</version> |
| 336 | 364 | </dependency> |
| 337 | 365 | |
| 338 | - <!-- ONNX Runtime(必需) --> | |
| 339 | - <dependency> | |
| 340 | - <groupId>com.microsoft.onnxruntime</groupId> | |
| 341 | - <artifactId>onnxruntime</artifactId> | |
| 342 | - <version>1.17.0</version> | |
| 343 | - </dependency> | |
| 366 | + | |
| 344 | 367 | |
| 345 | 368 | |
| 346 | 369 | <!-- 或者使用 Apache Tika 直接 --> | ... | ... |
src/main/java/com/xly/agent/ErpAiAgent.java
| ... | ... | @@ -11,14 +11,14 @@ import dev.langchain4j.service.V; |
| 11 | 11 | */ |
| 12 | 12 | public interface ErpAiAgent { |
| 13 | 13 | @SystemMessage(""" |
| 14 | - 1. 方法匹配:先精准拆解用户查询的核心业务意图,再自动匹配唯一符合用户问题的工具方法(MethodNo),禁止自创,规则如下; | |
| 15 | - 1.1 匹配方法时,无需考虑工具描述(@TOOL)中 1.必填参数,2.选填参数,示例,parameters内容 四个部分的内容; | |
| 16 | - 1.2 匹配方法时,只关注工具描述(@TOOL)中 “当用户” 和 “时,必须调用本工具”两个短语之间的内容; | |
| 17 | - 1.3 调用工具前,不需要询问用户提供缺失的参数 | |
| 18 | - 2. 参数提取:提取该工具的全部参数,与描述完全一致,严格按标注类型赋值,规则如下: | |
| 19 | - 2.1 数字无引号,为空时禁止赋值0; | |
| 20 | - 2.2 如果有空格需要去掉空格后再提取。 | |
| 21 | - """) | |
| 14 | + 1. 方法匹配:先精准拆解用户查询的核心业务意图,再自动匹配唯一符合用户问题的工具方法(MethodNo),禁止自创,规则如下; | |
| 15 | + 1.1 匹配方法时,无需考虑工具描述(@TOOL)中 1.必填参数,2.选填参数,示例,parameters内容 四个部分的内容; | |
| 16 | + 1.2 匹配方法时,只关注工具描述(@TOOL)中 “当用户” 和 “时,必须调用本工具”两个短语之间的内容; | |
| 17 | + 1.3 调用工具前,不需要询问用户提供缺失的参数 | |
| 18 | + 2. 参数提取:提取该工具的全部参数,与描述完全一致,严格按标注类型赋值,规则如下: | |
| 19 | + 2.1 数字无引号,为空时禁止赋值0; | |
| 20 | + 2.2 如果有空格需要去掉空格后再提取。 | |
| 21 | + """) | |
| 22 | 22 | @UserMessage("用户输入:{{userInput}}") |
| 23 | 23 | String chat(@MemoryId String userId, @V("userInput") String userInput); |
| 24 | 24 | |
| ... | ... | @@ -27,16 +27,16 @@ public interface ErpAiAgent { |
| 27 | 27 | * 入参:用户问题、执行的SQL、表结构、JSON格式结果 |
| 28 | 28 | */ |
| 29 | 29 | @SystemMessage(""" |
| 30 | - 你是专业的业务数据分析师,严格遵循以下**通用规则**解释查询结果,适用于所有业务场景: | |
| 31 | - 1. 解释风格:贴合业务场景,无任何SQL专业术语,用口语化、简洁的商业语言说明,避免技术词汇; | |
| 32 | - 2. 数据准确:严格按照JSON执行结果解释,不夸大、不遗漏、不编造数据,数值与结果完全一致; | |
| 33 | - 3. 输出格式:仅返回解释内容,不要列出ID,无多余标题、换行、符号,结果为空时直接返回“未查询到相关数据” | |
| 34 | - 3.1. 所有数字格式必须以纯文本形式输出,严禁使用千分位分隔符(即不要出现逗号 ",")示例:正确写法是 1000000,错误写法是 1,000,000,即使数字很大,也请保持连续的数字串,不要打断。 | |
| 35 | - 3.2 所有日期请转换为 YYYY-MM-DD 格式(例如:2026-03-15),严禁包含时间部分(如小时、分钟、秒)(例如:2026-03-15 00:00:00),也不要包含时区信息。” | |
| 36 | - 3.3. 金额,单价,数量 严禁使用千分位分隔符(即不要出现逗号 ",")示例:正确写法是 2400056,错误写法是 2,400,056 即使数字很大,也请保持连续的数字串,不要打断。 | |
| 37 | - 4. 长度控制:单条解释不超过150字,条理清晰,重点突出核心数据/趋势; | |
| 38 | - 5. 禁止重复:不重复用户问题、不重复执行的SQL语句,仅针对结果做业务解读。 | |
| 39 | - """) | |
| 30 | + 你是专业的业务数据分析师,严格遵循以下**通用规则**解释查询结果,适用于所有业务场景: | |
| 31 | + 1. 解释风格:贴合业务场景,无任何SQL专业术语,用口语化、简洁的商业语言说明,避免技术词汇; | |
| 32 | + 2. 数据准确:严格按照JSON执行结果解释,不夸大、不遗漏、不编造数据,数值与结果完全一致; | |
| 33 | + 3. 输出格式:仅返回解释内容,不要列出ID,无多余标题、换行、符号,结果为空时直接返回“未查询到相关数据” | |
| 34 | + 3.1. 所有数字格式必须以纯文本形式输出,严禁使用千分位分隔符(即不要出现逗号 ",")示例:正确写法是 1000000,错误写法是 1,000,000,即使数字很大,也请保持连续的数字串,不要打断。 | |
| 35 | + 3.2 所有日期请转换为 YYYY-MM-DD 格式(例如:2026-03-15),严禁包含时间部分(如小时、分钟、秒)(例如:2026-03-15 00:00:00),也不要包含时区信息。” | |
| 36 | + 3.3. 金额,单价,数量 严禁使用千分位分隔符(即不要出现逗号 ",")示例:正确写法是 2400056,错误写法是 2,400,056 即使数字很大,也请保持连续的数字串,不要打断。 | |
| 37 | + 4. 长度控制:单条解释不超过150字,条理清晰,重点突出核心数据/趋势; | |
| 38 | + 5. 禁止重复:不重复用户问题、不重复执行的SQL语句,仅针对结果做业务解读。 | |
| 39 | + """) | |
| 40 | 40 | @UserMessage(""" |
| 41 | 41 | 【业务场景表结构信息】 |
| 42 | 42 | 表结构详情:{{tableStruct}} |
| ... | ... | @@ -57,119 +57,143 @@ public interface ErpAiAgent { |
| 57 | 57 | * 入参:用户问题、执行的SQL、表结构、JSON格式结果 |
| 58 | 58 | */ |
| 59 | 59 | @SystemMessage(""" |
| 60 | - 你是专业的业务数据分析师,请分析以下查询结果: | |
| 61 | - 【用户问题】 | |
| 62 | - {{userInput}} | |
| 63 | - 【数据字段说明】 | |
| 64 | - {{sMilvusFiledDescription}} | |
| 65 | - 【查询结果数据(JSON格式)】 | |
| 66 | - {{result}} | |
| 67 | - 【分析要求】 | |
| 68 | - 1. 解释风格:贴合业务场景,无任何SQL专业术语,用口语化、简洁的商业语言说明,避免技术词汇; | |
| 69 | - 2. 数据准确:严格按照JSON执行结果解释,不夸大、不遗漏、不编造数据,数值与结果完全一致; | |
| 70 | - 3. 输出格式: | |
| 71 | - 3.1. 如果用户要求"表格形式展示",先输出简短文字说明,然后输出Markdown格式的表格 | |
| 72 | - 3.2. 如果用户未要求表格,仅返回解释内容,不要列出ID,无多余标题、换行、符号 | |
| 73 | - 3.3. 结果为空时直接返回"未查询到相关数据" | |
| 74 | - 3.4. 所有数字格式必须以纯文本形式输出,严禁使用千分位分隔符(即不要出现逗号 ",") | |
| 75 | - 3.5. 所有日期请转换为 YYYY-MM-DD 格式,严禁包含时间部分 | |
| 76 | - 4. 长度控制:单条解释不超过150字,条理清晰,重点突出核心数据/趋势; | |
| 77 | - 5. 禁止重复:不重复用户问题、不重复执行的SQL语句,仅针对结果做业务解读。 | |
| 78 | - """) | |
| 60 | + 你是专业的业务数据分析师,请分析以下查询结果: | |
| 61 | + 【用户问题】 | |
| 62 | + {{userInput}} | |
| 63 | + 【数据字段说明】 | |
| 64 | + {{sMilvusFiledDescription}} | |
| 65 | + 【查询结果数据(JSON格式)】 | |
| 66 | + {{result}} | |
| 67 | + 【分析要求】 | |
| 68 | + 1. 解释风格:贴合业务场景,无任何SQL专业术语,用口语化、简洁的商业语言说明,避免技术词汇; | |
| 69 | + 2. 数据准确:严格按照JSON执行结果解释,不夸大、不遗漏、不编造数据,数值与结果完全一致; | |
| 70 | + 3. 输出格式: | |
| 71 | + 3.1. 如果用户要求"表格形式展示",先输出简短文字说明,然后输出Markdown格式的表格 | |
| 72 | + 3.2. 如果用户未要求表格,仅返回解释内容,不要列出ID,无多余标题、换行、符号 | |
| 73 | + 3.3. 结果为空时直接返回"未查询到相关数据" | |
| 74 | + 3.4. 所有数字格式必须以纯文本形式输出,严禁使用千分位分隔符(即不要出现逗号 ",") | |
| 75 | + 3.5. 所有日期请转换为 YYYY-MM-DD 格式,严禁包含时间部分 | |
| 76 | + 4. 长度控制:单条解释不超过150字,条理清晰,重点突出核心数据/趋势; | |
| 77 | + 5. 禁止重复:不重复用户问题、不重复执行的SQL语句,仅针对结果做业务解读。 | |
| 78 | + """) | |
| 79 | 79 | @UserMessage(""" |
| 80 | - 【用户查询】 | |
| 81 | - {{userInput}} | |
| 82 | - 【字段说明】 | |
| 83 | - {{sMilvusFiledDescription}} | |
| 84 | - 【查询结果】 | |
| 85 | - 用户原始查询:{{userInput}} | |
| 86 | - 执行查询向量库后结果(JSON格式):{{result}} | |
| 87 | - 请根据上述信息+通用规则,对查询结果做业务解释: | |
| 88 | - """) | |
| 80 | + 【用户查询】 | |
| 81 | + {{userInput}} | |
| 82 | + 【字段说明】 | |
| 83 | + {{sMilvusFiledDescription}} | |
| 84 | + 【查询结果】 | |
| 85 | + 用户原始查询:{{userInput}} | |
| 86 | + 执行查询向量库后结果(JSON格式):{{result}} | |
| 87 | + 请根据上述信息+通用规则,对查询结果做业务解释: | |
| 88 | + """) | |
| 89 | 89 | String explainMilvusResult(@MemoryId String userId, |
| 90 | 90 | @V("userInput") String userInput, |
| 91 | 91 | @V("sMilvusFiledDescription") String sMilvusFiledDescription, |
| 92 | 92 | @V("result") String result); |
| 93 | 93 | |
| 94 | - /** | |
| 95 | - * AI路由判断接口 | |
| 96 | - * true: 走聚合查询(MySQL) | |
| 97 | - * false: 走向量检索(Milvus) | |
| 98 | - */ | |
| 99 | -// @SystemMessage(""" | |
| 100 | -// 你是一个智能查询路由专家,请根据用户需求判断应该使用哪种查询方式。 | |
| 101 | -// | |
| 102 | -// 判断标准: | |
| 103 | -// 1. 返回 true(聚合查询/MySQL)的场景: | |
| 104 | -// - 需要计算统计指标:总数、总和、平均值、最大/最小值、占比 | |
| 105 | -// - 需要数据汇总:分组统计、排行榜、TopN | |
| 106 | -// - 包含关键词:统计、求和、汇总、排名、平均、数量、总额、最高、最低、占比、分组、分析、趋势 | |
| 107 | -// - 示例:统计本月销售总额、查询销量前10的商品、各品类占比分析 | |
| 108 | -// | |
| 109 | -// 2. 返回 false(向量检索/Milvus)的场景: | |
| 110 | -// - 查询明细数据:XXX的销售订单明细、XXX的客户信息、具体内容详情 | |
| 111 | -// - 查找相似内容:根据语义查找相关文档、推荐相似商品 | |
| 112 | -// - 模糊匹配:不确定具体关键词,需要语义理解 | |
| 113 | -// - 内容检索:查找包含特定概念的文档 | |
| 114 | -// - 包含关键词:明细、详情、查询明细、查找、搜索、匹配、推荐、相似、相关、类似 | |
| 115 | -// - 示例:李留记的销售订单明细、查询关于人工智能的文档、找相似的图片 | |
| 116 | -// | |
| 117 | -// 重要规则: | |
| 118 | -// - 只返回 true 或 false,不要返回其他内容 | |
| 119 | -// - 不要解释,不要添加额外文字 | |
| 120 | -// - 如果用户要求"表格形式展示",返回 false(明细查询) | |
| 121 | -// - 如果用户指定具体人名、具体对象,返回 false(明细查询) | |
| 122 | -// """) | |
| 123 | -// @UserMessage("用户需求:{{userInput}}") | |
| 124 | 94 | @SystemMessage(""" |
| 125 | - 你是一个智能查询路由专家。请根据【用户需求】,只返回 true 或 false | |
| 126 | - - 如果用户需求包含以下关键词:统计、求和、汇总、排名、TopN、平均、数量、总额、最高、最低、占比、分组,则返回true | |
| 127 | - - 如果用户需求属于模糊匹配、普通语义检索,查询明细,(例如:查询报价单明细,查询客户信息),则返回false | |
| 128 | - - 查询明细数据:XXX的销售订单明细、XXX的客户信息、具体内容详情,则返回false | |
| 129 | - - 模糊匹配:不确定具体关键词,需要语义理解,则返回false | |
| 95 | + 你是一个智能查询路由专家。请根据【用户需求】,只返回 true 或 false。 | |
| 96 | + | |
| 97 | + 【最高优先级规则 - 必须首先判断】 | |
| 98 | + 如果用户需求包含以下任一关键词,**直接返回 false**,不再进行其他判断: | |
| 99 | + - 明细、详情、详细信息、详细内容、具体内容 | |
| 100 | + - 查询...明细、...详情、...记录、...列表、...清单 | |
| 101 | + | |
| 102 | + 重要:只要出现以上关键词,说明用户需要的是明细数据查询,而非统计分析。 | |
| 103 | + | |
| 104 | + 【统计类关键词 - 仅在满足最高优先级规则后才判断】 | |
| 105 | + 只有当用户需求不包含上述明细类关键词时,才检查是否包含以下关键词: | |
| 106 | + 统计、求和、汇总、排名、TopN、平均、数量、总额、最高、最低、占比、分组 | |
| 107 | + | |
| 108 | + - 如果包含,返回 true | |
| 109 | + - 否则返回 false | |
| 110 | + | |
| 111 | + 【判断示例】 | |
| 112 | + - \"查询中科精工集团的彩盒类产品的报价单明细\" → false(包含\"明细\") | |
| 113 | + - \"统计各产品销售额\" → true(包含\"统计\",且无明细关键词) | |
| 114 | + - \"查询客户张三信息\" → false(无统计关键词,无明细关键词) | |
| 115 | + - \"销售额排名前10的产品\" → true(包含\"排名\",且无明细关键词) | |
| 116 | + - \"查看销售订单明细\" → false(包含\"明细\") | |
| 130 | 117 | """) |
| 131 | 118 | @UserMessage(""" |
| 132 | - 【用户需求】 | |
| 119 | + 【用户需求】 | |
| 133 | 120 | {{userInput}} |
| 134 | 121 | """) |
| 135 | 122 | Boolean routeQuery(@MemoryId String userId, @V("userInput") String userInput); |
| 136 | 123 | |
| 137 | 124 | /** |
| 138 | - * 生成 Milvus 过滤条件 | |
| 125 | + * 生成 Milvus 过滤条件(适配 Milvus v2.3.9) | |
| 139 | 126 | */ |
| 140 | 127 | @SystemMessage(""" |
| 141 | - MILVUS 标量过滤条件生成规则(严格遵守): | |
| 142 | - 1. 语法规范: | |
| 143 | - - 允许的操作符:==, !=, like | |
| 144 | - - 逻辑组合:&& (AND), || (OR) | |
| 145 | - - 所有字段都是字符串类型,值必须使用单引号包裹 | |
| 146 | - - 字符串中的单引号需要转义:'O''Reilly' | |
| 147 | - 2. 可用字段(只能使用这些字段): | |
| 148 | - - {{sMilvusFiled}} | |
| 149 | - 字段说明: | |
| 150 | - - {{sMilvusFiledDescription}} | |
| 151 | - 3. 重要规则: | |
| 152 | - - 只使用上述可用字段,不要创建新字段 | |
| 153 | - - 如果用户提到了文档类型(如"报价单"、"订单"等),但可用字段中没有类型字段,则忽略该条件 | |
| 154 | - - 只提取有明确值的字段条件 | |
| 155 | - 4. 生成规则: | |
| 156 | - - 如果没有提取到任何具体条件,返回空字符串 | |
| 157 | - - 从用户输入中提取明确的字段条件 | |
| 158 | - - 识别模式:字段名 + 操作符 + 值 | |
| 159 | - - 示例: | |
| 160 | - * "单据号 INV001" → sBillNo == 'INV001' | |
| 161 | - * "客户编号 C001" → sCustomerNo == 'C001' | |
| 162 | - * "销售人员张三" → sSalesManName == '张三' | |
| 163 | - * "产品包含手机" → sProductStyle like '%手机%' | |
| 164 | - 5. 输出格式: | |
| 165 | - - 仅返回纯过滤条件,无任何解释、换行、备注 | |
| 166 | - - 单条件:sBillNo == 'INV001' | |
| 167 | - - 多条件:(sBillNo == 'INV001' && sCustomerNo == 'C001') | |
| 168 | - - 无条件:直接返回空字符串 | |
| 169 | - """) | |
| 170 | - @UserMessage(""" | |
| 128 | + MILVUS 标量过滤条件生成规则(严格遵守 - 当前版本 v2.3.9): | |
| 129 | + | |
| 130 | + 【重要输出约束】 | |
| 131 | + - 必须返回有效的 Milvus 过滤条件表达式 | |
| 132 | + - 禁止返回 true 或 false | |
| 133 | + - 禁止返回空字符串以外的任何非表达式内容 | |
| 134 | + - 无条件时只返回空字符串 "" | |
| 135 | + | |
| 136 | + 1. 语法规范: | |
| 137 | + - 允许的操作符:==, !=, like | |
| 138 | + - 逻辑组合:&& (AND), || (OR) | |
| 139 | + - 所有字段都是字符串类型,值必须使用单引号包裹 | |
| 140 | + - 字符串中的单引号需要转义:'O''Reilly' | |
| 141 | + | |
| 142 | + 2. 【重要】Milvus v2.3.9 like 操作符限制: | |
| 143 | + - ✅ 支持:like '关键字%'(前缀匹配,以关键字开头) | |
| 144 | + - ❌ 不支持:like '%关键字%'(包含匹配) | |
| 145 | + - ❌ 不支持:like '%关键字'(后缀匹配) | |
| 146 | + | |
| 147 | + 3. 可用字段(只能使用这些字段): | |
| 148 | + - {{sMilvusFiled}} | |
| 149 | + 字段说明: | |
| 150 | + - {{sMilvusFiledDescription}} | |
| 151 | + | |
| 152 | + 4. 提取规则: | |
| 153 | + - 只使用上述可用字段,不要创建新字段 | |
| 154 | + - 如果用户提到了文档类型(如"报价单"、"订单"等),但可用字段中没有类型字段,则忽略该条件 | |
| 155 | + | |
| 156 | + 【精确匹配规则】: | |
| 157 | + - 当用户提供明确值时:字段 == '值' | |
| 158 | + * 例如:"客户名称中科精工" → sCustomerName == '中科精工' | |
| 159 | + * 例如:"单据号 INV001" → sBillNo == 'INV001' | |
| 160 | + | |
| 161 | + 5. 时间处理规则: | |
| 162 | + - 当前系统时间:{{sDataNow}}(格式:yyyy-MM-dd) | |
| 163 | + - 相对时间转换规则: | |
| 164 | + * "今天/今日" → 当天 00:00:00 到 23:59:59 | |
| 165 | + * "昨天" → 前一天 00:00:00 到 23:59:59 | |
| 166 | + * "本周" → 本周一 00:00:00 到本周日 23:59:59 | |
| 167 | + * "本月" → 本月1日 00:00:00 到本月最后一天 23:59:59 | |
| 168 | + * "本年" → 本年1月1日 00:00:00 到本年12月31日 23:59:59 | |
| 169 | + * "近X天" → 从 X 天前 00:00:00 到今天 23:59:59 | |
| 170 | + - 日期转时间戳:所有日期转换为 Unix 时间戳(秒) | |
| 171 | + - 时间范围格式:字段 >= 起始时间戳 && 字段 <= 结束时间戳 | |
| 172 | + - 如果没有明确的时间需求,不要添加任何时间过滤条件 | |
| 173 | + | |
| 174 | + 6. 示例: | |
| 175 | + ✅ 正确输出: | |
| 176 | + - "客户名称中科精工" → sCustomerName == '中科精工' | |
| 177 | + - "中科精工的报价单明细" → sCustomerName == '中科精工' | |
| 178 | + - "产品以彩盒开头" → sProductStyle like '彩盒%' | |
| 179 | + - "无条件" → "" | |
| 180 | + | |
| 181 | + ❌ 错误输出(禁止): | |
| 182 | + - "中科精工的报价单明细" → true | |
| 183 | + - "中科精工的报价单明细" → false | |
| 184 | + - "中科精工的报价单明细" → 1 | |
| 185 | + | |
| 186 | + 7. 输出格式: | |
| 187 | + - 仅返回纯过滤条件,无任何解释、换行、备注 | |
| 188 | + - 单条件:sCustomerName == '中科精工' | |
| 189 | + - 多条件:(sCustomerName == '中科精工' && sProductStyle like '彩盒%') | |
| 190 | + - 无条件:直接返回空字符串 "" | |
| 191 | + """) | |
| 192 | + @UserMessage(""" | |
| 171 | 193 | 【用户查询】 |
| 172 | 194 | - {{userInput}} |
| 195 | + 【当前时间】 | |
| 196 | + - {{sDataNow}} | |
| 173 | 197 | 【可用字段】 |
| 174 | 198 | - {{sMilvusFiled}} |
| 175 | 199 | 【字段说明】 |
| ... | ... | @@ -178,5 +202,7 @@ public interface ErpAiAgent { |
| 178 | 202 | String getMilvusFilter(@MemoryId String userId, |
| 179 | 203 | @V("userInput") String userInput, |
| 180 | 204 | @V("sMilvusFiled") String sMilvusFiled, |
| 181 | - @V("sMilvusFiledDescription") String sMilvusFiledDescription); | |
| 205 | + @V("sMilvusFiledDescription") String sMilvusFiledDescription, | |
| 206 | + @V("sDataNow") String sDataNow); | |
| 207 | + | |
| 182 | 208 | } | ... | ... |
src/main/java/com/xly/entity/AiResponseDTO.java
| ... | ... | @@ -139,6 +139,17 @@ public class AiResponseDTO implements Serializable { |
| 139 | 139 | */ |
| 140 | 140 | private String errorCode; |
| 141 | 141 | |
| 142 | + /** | |
| 143 | + * 数据库类型 X: 向量库 S:数据库 | |
| 144 | + */ | |
| 145 | + private String dbType; | |
| 146 | + | |
| 147 | + /** | |
| 148 | + * 数据库类型 H: 缓存 D: 动态 | |
| 149 | + */ | |
| 150 | + private String dbCach; | |
| 151 | + | |
| 152 | + | |
| 142 | 153 | // ============ 便捷方法 ============ |
| 143 | 154 | |
| 144 | 155 | /** | ... | ... |
src/main/java/com/xly/entity/ToolMeta.java
src/main/java/com/xly/entity/UserSceneSession.java
| ... | ... | @@ -67,6 +67,16 @@ public class UserSceneSession { |
| 67 | 67 | private Map<String, Object> args; |
| 68 | 68 | |
| 69 | 69 | /** |
| 70 | + * 数据库类型 X: 向量库 S:数据库 | |
| 71 | + */ | |
| 72 | + private String dbType; | |
| 73 | + | |
| 74 | + /** | |
| 75 | + * 数据库类型 H: 缓存 D: 动态 | |
| 76 | + */ | |
| 77 | + private String dbCach; | |
| 78 | + | |
| 79 | + /** | |
| 70 | 80 | * 构建场景选择提示语:展示权限内场景,引导用户选择 |
| 71 | 81 | * @return 自然语言提示语 |
| 72 | 82 | */ | ... | ... |
src/main/java/com/xly/milvus/service/AiGlobalAgentQuestionSqlEmitterService.java
| ... | ... | @@ -12,7 +12,7 @@ public interface AiGlobalAgentQuestionSqlEmitterService { |
| 12 | 12 | * @return void |
| 13 | 13 | * @Description 插入向量库 |
| 14 | 14 | **/ |
| 15 | - void addAiGlobalAgentQuestionSqlEmitter(String sKey,Map<String,Object> data, String sQuestion, String sSqlContent, String collectionName); | |
| 15 | + void addAiGlobalAgentQuestionSqlEmitter(String sKey,Map<String,Object> data,String sQuestion,String sSqlContent,String cachType,String collectionName); | |
| 16 | 16 | |
| 17 | 17 | |
| 18 | 18 | Map<String, Object> queryAiGlobalAgentQuestionSqlEmitter(String searchText, String collectionName); | ... | ... |
src/main/java/com/xly/milvus/service/MilvusService.java
| ... | ... | @@ -54,5 +54,7 @@ public interface MilvusService { |
| 54 | 54 | * @return java.util.Map<java.lang.String,java.lang.Object> |
| 55 | 55 | * @Description 获取配置 |
| 56 | 56 | **/ |
| 57 | - Map<String,Object> getMilvusFiled(String sVectorfiled,String sVectorfiledAll); | |
| 57 | + Map<String,Object> getMilvusFiled(String sVectorfiled,String sVectorfiledAll,String sVectorfiledShow); | |
| 58 | + | |
| 59 | + boolean isValidMilvusFilter(String milvusFilter); | |
| 58 | 60 | } |
| 59 | 61 | \ No newline at end of file | ... | ... |
src/main/java/com/xly/milvus/service/impl/AiGlobalAgentQuestionSqlEmitterServiceImpl.java
| ... | ... | @@ -2,6 +2,7 @@ package com.xly.milvus.service.impl; |
| 2 | 2 | |
| 3 | 3 | import cn.hutool.core.collection.ConcurrentHashSet; |
| 4 | 4 | import cn.hutool.core.util.ObjectUtil; |
| 5 | +import cn.hutool.core.util.StrUtil; | |
| 5 | 6 | import com.google.gson.JsonArray; |
| 6 | 7 | import com.google.gson.JsonObject; |
| 7 | 8 | import com.xly.milvus.service.AiGlobalAgentQuestionSqlEmitterService; |
| ... | ... | @@ -52,16 +53,19 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent |
| 52 | 53 | * @Description 插入数据 |
| 53 | 54 | **/ |
| 54 | 55 | @Override |
| 55 | - public void addAiGlobalAgentQuestionSqlEmitter(String sKey,Map<String,Object> data,String sQuestion,String sSqlContent,String collectionName) { | |
| 56 | + public void addAiGlobalAgentQuestionSqlEmitter(String sKey,Map<String,Object> data,String sQuestion,String sSqlContent,String cachType,String collectionName) { | |
| 56 | 57 | // 向量化 |
| 57 | 58 | List<Float> vector = vectorizationService.textToVector(sKey); |
| 58 | 59 | |
| 59 | 60 | if (vector == null || vector.isEmpty()) { |
| 60 | 61 | throw new RuntimeException("向量化失败"); |
| 61 | 62 | } |
| 63 | + if(ObjectUtil.isEmpty(sSqlContent)){ | |
| 64 | + sSqlContent = StrUtil.EMPTY; | |
| 65 | + } | |
| 62 | 66 | |
| 63 | 67 | // 2. 转换为Milvus格式 |
| 64 | - JsonObject row = convertToMilvusRow(data, vector,sQuestion,sSqlContent,sKey); | |
| 68 | + JsonObject row = convertToMilvusRow(data, vector,sQuestion,sSqlContent,cachType,sKey); | |
| 65 | 69 | |
| 66 | 70 | //创建集合 |
| 67 | 71 | // createCollection(collectionName); |
| ... | ... | @@ -86,7 +90,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent |
| 86 | 90 | Map<String, Object> searchParams = new HashMap<>(); |
| 87 | 91 | searchParams.put("nprobe", 10); |
| 88 | 92 | // 对于 IP 度量,相似度范围在 [minScore, maxScore] |
| 89 | - searchParams.put("radius", 0.9); // 最小相似度 | |
| 93 | + searchParams.put("radius", 0.98); // 最小相似度 | |
| 90 | 94 | searchParams.put("range_filter", 1); // 最大相似度 |
| 91 | 95 | // 1. 确保集合已加载 |
| 92 | 96 | ensureCollectionLoaded(collectionName); |
| ... | ... | @@ -108,7 +112,6 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent |
| 108 | 112 | // .limit(100) |
| 109 | 113 | // .build(); |
| 110 | 114 | // QueryResp queryResp = milvusClient.query(queryReq); |
| 111 | - | |
| 112 | 115 | // 3. 创建 Milvus FloatVec 对象 |
| 113 | 116 | FloatVec floatVec = new FloatVec(floatArray); |
| 114 | 117 | // 4. 构建搜索请求 |
| ... | ... | @@ -118,7 +121,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent |
| 118 | 121 | .annsField("vector") // 向量字段名 |
| 119 | 122 | .topK(10) // 返回最相似的10条 |
| 120 | 123 | .metricType(IndexParam.MetricType.IP) // 内积相似度 |
| 121 | - .outputFields(Arrays.asList("sQuestion", "sSqlContent", "data_id", "create_time","metadata")) | |
| 124 | + .outputFields(Arrays.asList("sQuestion", "sSqlContent", "data_id","db_name", "create_time","metadata")) | |
| 122 | 125 | .searchParams(searchParams) |
| 123 | 126 | .build(); |
| 124 | 127 | // 5. 执行搜索 |
| ... | ... | @@ -230,7 +233,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent |
| 230 | 233 | /** |
| 231 | 234 | * 从实体对象构建Milvus插入数据 |
| 232 | 235 | */ |
| 233 | - public JsonObject convertToMilvusRow(Map<String,Object> data, List<Float> vector,String sQuestion,String sSqlContent,String sKey) { | |
| 236 | + public JsonObject convertToMilvusRow(Map<String,Object> data, List<Float> vector,String sQuestion,String sSqlContent,String cachType,String sKey) { | |
| 234 | 237 | JsonObject row = new JsonObject(); |
| 235 | 238 | |
| 236 | 239 | // 添加向量 |
| ... | ... | @@ -242,6 +245,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent |
| 242 | 245 | row.addProperty("data_id", data.get("sId").toString()); |
| 243 | 246 | row.addProperty("sQuestion", sQuestion); |
| 244 | 247 | row.addProperty("sSqlContent", sSqlContent); |
| 248 | + row.addProperty("cachType", cachType); | |
| 245 | 249 | // 创建时间字段 - 必须提供! |
| 246 | 250 | row.addProperty("create_time", System.currentTimeMillis()); |
| 247 | 251 | // 创建时间字段 - 必须提供! |
| ... | ... | @@ -305,23 +309,33 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent |
| 305 | 309 | CreateCollectionReq.FieldSchema.builder() |
| 306 | 310 | .name("sQuestion") |
| 307 | 311 | .dataType(DataType.VarChar) |
| 308 | - .maxLength(1000) | |
| 312 | + .maxLength(5000) | |
| 309 | 313 | .description("用户问题") |
| 310 | 314 | .build(), |
| 311 | 315 | |
| 312 | - // 4. SQL内容字段 | |
| 316 | + // 4. SQL内容字段 - 设置为可空 | |
| 313 | 317 | CreateCollectionReq.FieldSchema.builder() |
| 314 | 318 | .name("sSqlContent") |
| 315 | 319 | .dataType(DataType.VarChar) |
| 316 | - .maxLength(5000) // SQL可能较长 | |
| 320 | + .maxLength(50000) // SQL可能较长 | |
| 321 | + .isPrimaryKey(false) | |
| 322 | + .isNullable(true) // 设置为 true,允许为空 | |
| 317 | 323 | .description("SQL语句") |
| 318 | 324 | .build(), |
| 319 | 325 | |
| 326 | + // 4. 缓存类型 | |
| 327 | + CreateCollectionReq.FieldSchema.builder() | |
| 328 | + .name("cachType") | |
| 329 | + .dataType(DataType.VarChar) | |
| 330 | + .maxLength(100) // 缓存类型 | |
| 331 | + .description("缓存类型") | |
| 332 | + .build(), | |
| 333 | + | |
| 320 | 334 | // 5. 数据ID字段 |
| 321 | 335 | CreateCollectionReq.FieldSchema.builder() |
| 322 | 336 | .name("data_id") |
| 323 | 337 | .dataType(DataType.VarChar) |
| 324 | - .maxLength(100) | |
| 338 | + .maxLength(500) // 增加最大长度 | |
| 325 | 339 | .description("原始数据ID") |
| 326 | 340 | .build(), |
| 327 | 341 | |
| ... | ... | @@ -341,7 +355,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent |
| 341 | 355 | CreateCollectionReq.FieldSchema.builder() |
| 342 | 356 | .name("sKey") |
| 343 | 357 | .dataType(DataType.VarChar) |
| 344 | - .maxLength(100) | |
| 358 | + .maxLength(1000) // 增加最大长度 | |
| 345 | 359 | .description("存入的vector转换前数据") |
| 346 | 360 | .build() |
| 347 | 361 | ); |
| ... | ... | @@ -667,7 +681,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent |
| 667 | 681 | IndexParam questionIndex = IndexParam.builder() |
| 668 | 682 | .fieldName("sQuestion") |
| 669 | 683 | .indexName("idx_question") |
| 670 | - .indexType(IndexParam.IndexType.INVERTED) // 倒排索引 | |
| 684 | + .indexType(IndexParam.IndexType.TRIE) // 倒排索引 | |
| 671 | 685 | .build(); |
| 672 | 686 | allIndexParams.add(questionIndex); |
| 673 | 687 | |
| ... | ... | @@ -675,14 +689,21 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent |
| 675 | 689 | IndexParam idIndex = IndexParam.builder() |
| 676 | 690 | .fieldName("data_id") |
| 677 | 691 | .indexName("idx_data_id") |
| 678 | - .indexType(IndexParam.IndexType.INVERTED) | |
| 692 | + .indexType(IndexParam.IndexType.TRIE) | |
| 679 | 693 | .build(); |
| 680 | 694 | allIndexParams.add(idIndex); |
| 681 | 695 | |
| 696 | + IndexParam idx_cach_type = IndexParam.builder() | |
| 697 | + .fieldName("cachType") | |
| 698 | + .indexName("idx_cach_type") | |
| 699 | + .indexType(IndexParam.IndexType.TRIE) | |
| 700 | + .build(); | |
| 701 | + allIndexParams.add(idx_cach_type); | |
| 702 | + | |
| 682 | 703 | IndexParam sKey = IndexParam.builder() |
| 683 | 704 | .fieldName("sKey") |
| 684 | 705 | .indexName("s_key") |
| 685 | - .indexType(IndexParam.IndexType.INVERTED) | |
| 706 | + .indexType(IndexParam.IndexType.TRIE) | |
| 686 | 707 | .build(); |
| 687 | 708 | allIndexParams.add(sKey); |
| 688 | 709 | ... | ... |
src/main/java/com/xly/milvus/service/impl/MilvusServiceImpl.java
| ... | ... | @@ -264,24 +264,29 @@ public class MilvusServiceImpl implements MilvusService { |
| 264 | 264 | * @Description 返回组装动态内容 |
| 265 | 265 | **/ |
| 266 | 266 | @Override |
| 267 | - public Map<String,Object> getMilvusFiled(String sVectorfiled,String sVectorfiledAll){ | |
| 267 | + public Map<String,Object> getMilvusFiled(String sVectorfiled,String sVectorfiledAll,String sVectorfiledShow){ | |
| 268 | 268 | List<String> sFileds = new ArrayList<>(); |
| 269 | + List<String> filedsShow = new ArrayList<>(); | |
| 269 | 270 | List<String> sFiledDescriptions = new ArrayList<>(); |
| 270 | 271 | List<String> sFiledDescriptionsAll = new ArrayList<>(); |
| 271 | 272 | List<Map<String,String>> titleList = new LinkedList<>(); |
| 272 | 273 | String[] sVectorfiledArray = sVectorfiled.split(","); |
| 273 | 274 | for(String sVectorfiledOne : sVectorfiledArray){ |
| 274 | - Map<String,String> title = new HashMap<>(); | |
| 275 | - | |
| 276 | 275 | String[] sVectorfiledOneArray = sVectorfiledOne.split(":"); |
| 277 | 276 | String sDescriptions = sVectorfiledOneArray[0]; |
| 278 | 277 | String sName = sVectorfiledOneArray[1]; |
| 279 | 278 | sFileds.add(sName); |
| 280 | 279 | // 处理描述中可能包含的换行,保持缩进一致 |
| 281 | -// String formattedDesc = sDescriptions.replace("\n", "\n "); | |
| 282 | -// sFiledDescriptions.add(String.format(" - %s: %s", sName, formattedDesc)); | |
| 283 | 280 | String formattedDesc =String.format("%s: %s", sName, sDescriptions); |
| 284 | 281 | sFiledDescriptions.add(formattedDesc); |
| 282 | + } | |
| 283 | + String[] sVectorfiledShowArray = sVectorfiledShow.split(","); | |
| 284 | + for(String sVectorfiledShowOne : sVectorfiledShowArray){ | |
| 285 | + Map<String,String> title = new HashMap<>(4); | |
| 286 | + String[] sVectorfiledOneArray = sVectorfiledShowOne.split(":"); | |
| 287 | + String sDescriptions = sVectorfiledOneArray[0]; | |
| 288 | + String sName = sVectorfiledOneArray[1]; | |
| 289 | + filedsShow.add(sName); | |
| 285 | 290 | title.put("sName",sName); |
| 286 | 291 | title.put("sTitle",sDescriptions); |
| 287 | 292 | titleList.add(title); |
| ... | ... | @@ -291,9 +296,6 @@ public class MilvusServiceImpl implements MilvusService { |
| 291 | 296 | String[] sVectorfiledOneArray = sVectorfiledOne.split(":"); |
| 292 | 297 | String sDescriptions = sVectorfiledOneArray[0]; |
| 293 | 298 | String sName = sVectorfiledOneArray[1]; |
| 294 | - // 处理描述中可能包含的换行,保持缩进一致 | |
| 295 | -// String formattedDesc = sDescriptions.replace("\n", "\n "); | |
| 296 | -// sFiledDescriptions.add(String.format(" - %s: %s", sName, formattedDesc)); | |
| 297 | 299 | String formattedDesc =String.format("%s: %s", sName, sDescriptions); |
| 298 | 300 | sFiledDescriptionsAll.add(formattedDesc); |
| 299 | 301 | } |
| ... | ... | @@ -301,7 +303,7 @@ public class MilvusServiceImpl implements MilvusService { |
| 301 | 303 | rMap.put("sMilvusFiled", String.join(",", sFileds)); |
| 302 | 304 | rMap.put("sMilvusFiledDescription", String.join(",", sFiledDescriptions)); |
| 303 | 305 | rMap.put("sMilvusFiledDescriptionAll", String.join(",", sFiledDescriptionsAll)); |
| 304 | - rMap.put("sFileds", sFileds); | |
| 306 | + rMap.put("filedsShow", filedsShow); | |
| 305 | 307 | rMap.put("title", titleList); |
| 306 | 308 | return rMap; |
| 307 | 309 | } |
| ... | ... | @@ -334,9 +336,6 @@ public class MilvusServiceImpl implements MilvusService { |
| 334 | 336 | fields.add("metadata"); |
| 335 | 337 | // 3. 创建 Milvus FloatVec 对象 |
| 336 | 338 | FloatVec floatVec = new FloatVec(floatArray); |
| 337 | - log.info("查询向量库条件{}",milvusFilter); | |
| 338 | - milvusFilter = isValidMilvusFilter(milvusFilter)?milvusFilter : null; | |
| 339 | - log.info("实际查询向量库条件{}",milvusFilter); | |
| 340 | 339 | // 4. 构建搜索请求 |
| 341 | 340 | SearchReq searchReq = SearchReq.builder() |
| 342 | 341 | .collectionName(collectionName) |
| ... | ... | @@ -357,7 +356,7 @@ public class MilvusServiceImpl implements MilvusService { |
| 357 | 356 | |
| 358 | 357 | |
| 359 | 358 | /** |
| 360 | - * 判断 Milvus 过滤条件是否有效 | |
| 359 | + * 判断 Milvus 过滤条件是否有效(支持 TEXT_MATCH 全文检索) | |
| 361 | 360 | * @param milvusFilter 过滤条件字符串 |
| 362 | 361 | * @return true: 有效条件, false: 无效条件 |
| 363 | 362 | */ |
| ... | ... | @@ -374,42 +373,135 @@ public class MilvusServiceImpl implements MilvusService { |
| 374 | 373 | return false; |
| 375 | 374 | } |
| 376 | 375 | |
| 377 | - // 3. 检查是否包含有效的操作符 | |
| 378 | - boolean hasValidOperator = filter.matches(".*[=!<>]=?.*") || filter.contains(" like "); | |
| 376 | + // 3. 【修改】检查是否包含有效的操作符(增加 TEXT_MATCH 支持) | |
| 377 | + boolean hasValidOperator = filter.matches(".*[=!<>]=?.*") | |
| 378 | + || filter.contains(" like ") | |
| 379 | + || filter.toUpperCase().contains("TEXT_MATCH"); | |
| 380 | + | |
| 379 | 381 | if (!hasValidOperator) { |
| 380 | 382 | return false; |
| 381 | 383 | } |
| 382 | 384 | |
| 383 | - // 4. 检查字符串值是否使用单引号包裹 | |
| 384 | - // 匹配模式:字段名 操作符 '值' | |
| 385 | - Pattern pattern = Pattern.compile("[a-zA-Z_][a-zA-Z0-9_]*\\s*(==|!=|>=|<=|>|<|like)\\s*('[^']*'|\\d+)"); | |
| 386 | - Matcher matcher = pattern.matcher(filter); | |
| 387 | - | |
| 388 | - // 5. 对于复合条件,递归检查 | |
| 385 | + // 4. 对于复合条件,递归检查 | |
| 389 | 386 | if (filter.contains("&&") || filter.contains("||")) { |
| 390 | 387 | // 分割复合条件(简单处理,生产环境需要更完善的解析) |
| 391 | - String[] conditions = filter.split("&&|\\|\\|"); | |
| 388 | + String[] conditions = splitConditions(filter); | |
| 392 | 389 | for (String condition : conditions) { |
| 393 | - condition = condition.trim().replaceAll("^[()]+|[()]+$", ""); // 去除括号 | |
| 394 | - if (!isValidSimpleCondition(condition)) { | |
| 390 | + if (!isValidCondition(condition)) { | |
| 395 | 391 | return false; |
| 396 | 392 | } |
| 397 | 393 | } |
| 398 | 394 | return true; |
| 399 | 395 | } |
| 400 | 396 | |
| 401 | - // 6. 检查简单条件 | |
| 402 | - return isValidSimpleCondition(filter); | |
| 397 | + // 5. 检查单个条件 | |
| 398 | + return isValidCondition(filter); | |
| 399 | + } | |
| 400 | + | |
| 401 | + /** | |
| 402 | + * 拆分复合条件(处理括号嵌套) | |
| 403 | + */ | |
| 404 | + private String[] splitConditions(String filter) { | |
| 405 | + List<String> conditions = new ArrayList<>(); | |
| 406 | + StringBuilder current = new StringBuilder(); | |
| 407 | + int parentheses = 0; | |
| 408 | + | |
| 409 | + for (int i = 0; i < filter.length(); i++) { | |
| 410 | + char c = filter.charAt(i); | |
| 411 | + | |
| 412 | + if (c == '(') { | |
| 413 | + parentheses++; | |
| 414 | + current.append(c); | |
| 415 | + } else if (c == ')') { | |
| 416 | + parentheses--; | |
| 417 | + current.append(c); | |
| 418 | + } else if (parentheses == 0 && (filter.startsWith("&&", i) || filter.startsWith("||", i))) { | |
| 419 | + // 遇到顶层操作符,分割条件 | |
| 420 | + if (current.length() > 0) { | |
| 421 | + conditions.add(current.toString().trim()); | |
| 422 | + current = new StringBuilder(); | |
| 423 | + } | |
| 424 | + i += 1; // 跳过操作符的第二个字符 | |
| 425 | + } else { | |
| 426 | + current.append(c); | |
| 427 | + } | |
| 428 | + } | |
| 429 | + | |
| 430 | + if (current.length() > 0) { | |
| 431 | + conditions.add(current.toString().trim()); | |
| 432 | + } | |
| 433 | + | |
| 434 | + return conditions.toArray(new String[0]); | |
| 403 | 435 | } |
| 404 | 436 | |
| 405 | 437 | /** |
| 406 | - * 验证简单条件(不包含 && 和 ||) | |
| 438 | + * 验证单个条件(支持 TEXT_MATCH 和普通条件) | |
| 439 | + */ | |
| 440 | + private boolean isValidCondition(String condition) { | |
| 441 | + if (condition == null || condition.trim().isEmpty()) { | |
| 442 | + return false; | |
| 443 | + } | |
| 444 | + | |
| 445 | + condition = condition.trim(); | |
| 446 | + // 去除外层括号 | |
| 447 | + while (condition.startsWith("(") && condition.endsWith(")")) { | |
| 448 | + condition = condition.substring(1, condition.length() - 1).trim(); | |
| 449 | + } | |
| 450 | + | |
| 451 | + // 1. 【新增】检查 TEXT_MATCH 语法 | |
| 452 | + if (condition.toUpperCase().contains("TEXT_MATCH")) { | |
| 453 | + return isValidTextMatch(condition); | |
| 454 | + } | |
| 455 | + | |
| 456 | + // 2. 检查普通条件 | |
| 457 | + return isValidSimpleCondition(condition); | |
| 458 | + } | |
| 459 | + | |
| 460 | + /** | |
| 461 | + * 【新增】验证 TEXT_MATCH 语法 | |
| 462 | + * 格式:TEXT_MATCH(字段名, '关键词') | |
| 463 | + * 或:TEXT_MATCH(字段名, "关键词") | |
| 464 | + */ | |
| 465 | + private boolean isValidTextMatch(String condition) { | |
| 466 | + // 匹配 TEXT_MATCH(字段名, '关键词') 或 TEXT_MATCH(字段名, "关键词") | |
| 467 | + Pattern pattern = Pattern.compile( | |
| 468 | + "TEXT_MATCH\\s*\\(\\s*([a-zA-Z_][a-zA-Z0-9_]*)\\s*,\\s*['\"]([^'\"]*)['\"]\\s*\\)", | |
| 469 | + Pattern.CASE_INSENSITIVE | |
| 470 | + ); | |
| 471 | + Matcher matcher = pattern.matcher(condition); | |
| 472 | + | |
| 473 | + if (!matcher.matches()) { | |
| 474 | + log.warn("无效的 TEXT_MATCH 语法: {}", condition); | |
| 475 | + return false; | |
| 476 | + } | |
| 477 | + | |
| 478 | + String fieldName = matcher.group(1); | |
| 479 | + String keyword = matcher.group(2); | |
| 480 | + | |
| 481 | + // 检查字段名不能为空 | |
| 482 | + if (fieldName == null || fieldName.trim().isEmpty()) { | |
| 483 | + log.warn("TEXT_MATCH 字段名不能为空: {}", condition); | |
| 484 | + return false; | |
| 485 | + } | |
| 486 | + | |
| 487 | + // 检查关键词不能为空 | |
| 488 | + if (keyword == null || keyword.trim().isEmpty()) { | |
| 489 | + log.warn("TEXT_MATCH 关键词不能为空: {}", condition); | |
| 490 | + return false; | |
| 491 | + } | |
| 492 | + | |
| 493 | + return true; | |
| 494 | + } | |
| 495 | + | |
| 496 | + /** | |
| 497 | + * 验证简单条件(不包含 && 和 ||,不包含 TEXT_MATCH) | |
| 407 | 498 | */ |
| 408 | 499 | private boolean isValidSimpleCondition(String condition) { |
| 409 | 500 | if (condition == null || condition.trim().isEmpty()) { |
| 410 | 501 | return false; |
| 411 | 502 | } |
| 412 | 503 | condition = condition.trim(); |
| 504 | + | |
| 413 | 505 | // 匹配简单条件的正则 |
| 414 | 506 | // 格式:字段名 操作符 值 |
| 415 | 507 | // 字段名:字母开头,包含字母数字下划线 |
| ... | ... | @@ -422,22 +514,27 @@ public class MilvusServiceImpl implements MilvusService { |
| 422 | 514 | if (!condition.matches(regex)) { |
| 423 | 515 | return false; |
| 424 | 516 | } |
| 425 | - // 额外检查:like 操作符的值必须包含 % | |
| 517 | + | |
| 518 | + // 【修改】额外检查:like 操作符的限制 | |
| 426 | 519 | if (condition.contains(" like ")) { |
| 427 | 520 | String value = condition.split("like")[1].trim(); |
| 428 | 521 | if (!value.contains("%")) { |
| 429 | - return false; // like 必须使用 % 通配符 | |
| 522 | + log.warn("like 操作符必须包含 % 通配符: {}", condition); | |
| 523 | + return false; | |
| 524 | + } | |
| 525 | + | |
| 526 | + // 【新增】检查是否包含前后都有通配符的模式(Milvus 不支持) | |
| 527 | + if (value.matches("'%.*%'")) { | |
| 528 | + log.warn("Milvus 不支持前后都有通配符的 like: {}", condition); | |
| 529 | + return false; | |
| 430 | 530 | } |
| 431 | 531 | } |
| 532 | + | |
| 432 | 533 | return true; |
| 433 | 534 | } |
| 434 | 535 | |
| 435 | - | |
| 436 | - /** | |
| 437 | - * 处理 Milvus 查询结果(完整版) | |
| 438 | - */ | |
| 439 | 536 | /** |
| 440 | - * 处理 Milvus 查询结果(完整版) | |
| 537 | + * 处理 Milvus 查询结果 | |
| 441 | 538 | */ |
| 442 | 539 | private List<Map<String, Object>> processMilvusResults(SearchResp response) { |
| 443 | 540 | List<Map<String, Object>> results = new ArrayList<>(); |
| ... | ... | @@ -758,6 +855,7 @@ public class MilvusServiceImpl implements MilvusService { |
| 758 | 855 | .description(sDescription) |
| 759 | 856 | .isPrimaryKey(false) // 如果不是主键 |
| 760 | 857 | .isNullable(true) // 允许为空 |
| 858 | + | |
| 761 | 859 | // .defaultValue("") // 如果有默认值 |
| 762 | 860 | // SQL可能较长 |
| 763 | 861 | .maxLength(1000) | ... | ... |
src/main/java/com/xly/ocr/service/OcrService.java
0 → 100644
| 1 | +package com.xly.ocr.service; | |
| 2 | + | |
| 3 | +import net.sourceforge.tess4j.Tesseract; | |
| 4 | +import net.sourceforge.tess4j.TesseractException; | |
| 5 | +import org.slf4j.Logger; | |
| 6 | +import org.slf4j.LoggerFactory; | |
| 7 | +import org.springframework.beans.factory.annotation.Value; | |
| 8 | +import org.springframework.stereotype.Service; | |
| 9 | +import org.springframework.web.multipart.MultipartFile; | |
| 10 | + | |
| 11 | +import javax.imageio.ImageIO; | |
| 12 | +import java.awt.*; | |
| 13 | +import java.awt.image.BufferedImage; | |
| 14 | +import java.io.File; | |
| 15 | +import java.io.IOException; | |
| 16 | +import java.nio.file.Files; | |
| 17 | +import java.nio.file.Path; | |
| 18 | +import java.util.Arrays; | |
| 19 | +import java.util.List; | |
| 20 | + | |
| 21 | +@Service | |
| 22 | +public class OcrService { | |
| 23 | + | |
| 24 | + private static final Logger logger = LoggerFactory.getLogger(OcrService.class); | |
| 25 | + | |
| 26 | + private final Tesseract tesseract; | |
| 27 | + | |
| 28 | + // 配置参数 | |
| 29 | + private static final List<String> ALLOWED_EXTENSIONS = Arrays.asList(".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"); | |
| 30 | + private static final long MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB | |
| 31 | + private static final int BINARIZE_THRESHOLD = 127; | |
| 32 | + private static final int MIN_WIDTH = 800; | |
| 33 | + private static final int MIN_HEIGHT = 200; | |
| 34 | + | |
| 35 | + // 性能统计 | |
| 36 | + private static class OcrStats { | |
| 37 | + long preprocessTime = 0; | |
| 38 | + long ocrTime = 0; | |
| 39 | + String imageSize = ""; | |
| 40 | + | |
| 41 | + @Override | |
| 42 | + public String toString() { | |
| 43 | + return String.format("预处理耗时: %dms, OCR耗时: %dms, 图片尺寸: %s", | |
| 44 | + preprocessTime, ocrTime, imageSize); | |
| 45 | + } | |
| 46 | + } | |
| 47 | + | |
| 48 | + public OcrService(@Value("${tesseract.datapath}") String dataPath) { | |
| 49 | + this.tesseract = new Tesseract(); | |
| 50 | + | |
| 51 | + // 基础配置 | |
| 52 | + this.tesseract.setDatapath(dataPath); | |
| 53 | + this.tesseract.setLanguage("chi_sim+eng"); | |
| 54 | + | |
| 55 | + // 优化识别参数 | |
| 56 | + configureTesseract(); | |
| 57 | + | |
| 58 | + logger.info("Tesseract 初始化完成,语言包路径: {}, 语言: chi_sim+eng", dataPath); | |
| 59 | + } | |
| 60 | + | |
| 61 | + /** | |
| 62 | + * 配置 Tesseract 参数 | |
| 63 | + */ | |
| 64 | + private void configureTesseract() { | |
| 65 | + // 页面分割模式:3 = 自动页面分割,但没有方向检测 | |
| 66 | + this.tesseract.setPageSegMode(3); | |
| 67 | + | |
| 68 | + // OCR 引擎模式:3 = 默认,基于 LSTM 和传统引擎 | |
| 69 | + this.tesseract.setOcrEngineMode(3); | |
| 70 | + | |
| 71 | + // 提高中文识别率 | |
| 72 | + this.tesseract.setVariable("preserve_interword_spaces", "1"); | |
| 73 | + this.tesseract.setVariable("textord_force_make_prop_words", "true"); | |
| 74 | + | |
| 75 | + // 可选:设置字符白名单(根据需要启用) | |
| 76 | + // this.tesseract.setVariable("tessedit_char_whitelist", | |
| 77 | + // "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,。!?;:\"‘’“”【】()《》"); | |
| 78 | + | |
| 79 | + // 可选:设置黑名单(排除干扰字符) | |
| 80 | + // this.tesseract.setVariable("tessedit_char_blacklist", "|\\/`~@#$%^&*()_+={}[]"); | |
| 81 | + } | |
| 82 | + | |
| 83 | + /** | |
| 84 | + * 图片预处理 - 优化的处理流程 | |
| 85 | + */ | |
| 86 | + private BufferedImage preprocessImage(BufferedImage originalImage) { | |
| 87 | + if (originalImage == null) { | |
| 88 | + return null; | |
| 89 | + } | |
| 90 | + | |
| 91 | + try { | |
| 92 | + long startTime = System.currentTimeMillis(); | |
| 93 | + | |
| 94 | + // 1. 自动调整亮度和对比度 | |
| 95 | + BufferedImage adjusted = autoAdjustBrightnessContrast(originalImage); | |
| 96 | + | |
| 97 | + // 2. 灰度化 | |
| 98 | + BufferedImage grayImage = toGray(adjusted); | |
| 99 | + | |
| 100 | + // 3. 自适应二值化(比固定阈值更好) | |
| 101 | + BufferedImage binaryImage = adaptiveBinarize(grayImage); | |
| 102 | + | |
| 103 | + // 4. 降噪处理 | |
| 104 | + BufferedImage denoisedImage = denoise(binaryImage); | |
| 105 | + | |
| 106 | + // 5. 放大图片(如果太小) | |
| 107 | + BufferedImage scaledImage = scaleImageIfNeeded(denoisedImage); | |
| 108 | + | |
| 109 | + // 6. 可选:边缘增强(提高清晰度) | |
| 110 | + BufferedImage enhancedImage = sharpen(scaledImage); | |
| 111 | + | |
| 112 | + long endTime = System.currentTimeMillis(); | |
| 113 | + logger.debug("图片预处理耗时: {}ms", endTime - startTime); | |
| 114 | + | |
| 115 | + return enhancedImage; | |
| 116 | + | |
| 117 | + } catch (Exception e) { | |
| 118 | + logger.error("图片预处理失败: {}", e.getMessage(), e); | |
| 119 | + return originalImage; | |
| 120 | + } | |
| 121 | + } | |
| 122 | + | |
| 123 | + /** | |
| 124 | + * 自动调整亮度和对比度 | |
| 125 | + */ | |
| 126 | + private BufferedImage autoAdjustBrightnessContrast(BufferedImage image) { | |
| 127 | + BufferedImage result = new BufferedImage( | |
| 128 | + image.getWidth(), image.getHeight(), image.getType()); | |
| 129 | + | |
| 130 | + // 计算亮度直方图 | |
| 131 | + int[] histogram = new int[256]; | |
| 132 | + for (int y = 0; y < image.getHeight(); y++) { | |
| 133 | + for (int x = 0; x < image.getWidth(); x++) { | |
| 134 | + int rgb = image.getRGB(x, y); | |
| 135 | + int gray = (int)((rgb >> 16 & 0xFF) * 0.299 + | |
| 136 | + (rgb >> 8 & 0xFF) * 0.587 + | |
| 137 | + (rgb & 0xFF) * 0.114); | |
| 138 | + histogram[gray]++; | |
| 139 | + } | |
| 140 | + } | |
| 141 | + | |
| 142 | + // 找到黑色和白色的阈值 | |
| 143 | + int total = image.getWidth() * image.getHeight(); | |
| 144 | + int blackThreshold = 0; | |
| 145 | + int whiteThreshold = 255; | |
| 146 | + | |
| 147 | + int sum = 0; | |
| 148 | + for (int i = 0; i < 256; i++) { | |
| 149 | + sum += histogram[i]; | |
| 150 | + if (sum > total * 0.05) { | |
| 151 | + blackThreshold = i; | |
| 152 | + break; | |
| 153 | + } | |
| 154 | + } | |
| 155 | + | |
| 156 | + sum = 0; | |
| 157 | + for (int i = 255; i >= 0; i--) { | |
| 158 | + sum += histogram[i]; | |
| 159 | + if (sum > total * 0.05) { | |
| 160 | + whiteThreshold = i; | |
| 161 | + break; | |
| 162 | + } | |
| 163 | + } | |
| 164 | + | |
| 165 | + // 应用对比度拉伸 | |
| 166 | + for (int y = 0; y < image.getHeight(); y++) { | |
| 167 | + for (int x = 0; x < image.getWidth(); x++) { | |
| 168 | + int rgb = image.getRGB(x, y); | |
| 169 | + int r = (rgb >> 16) & 0xFF; | |
| 170 | + int g = (rgb >> 8) & 0xFF; | |
| 171 | + int b = rgb & 0xFF; | |
| 172 | + | |
| 173 | + // 拉伸到 0-255 范围 | |
| 174 | + r = stretchValue(r, blackThreshold, whiteThreshold); | |
| 175 | + g = stretchValue(g, blackThreshold, whiteThreshold); | |
| 176 | + b = stretchValue(b, blackThreshold, whiteThreshold); | |
| 177 | + | |
| 178 | + result.setRGB(x, y, (r << 16) | (g << 8) | b); | |
| 179 | + } | |
| 180 | + } | |
| 181 | + | |
| 182 | + return result; | |
| 183 | + } | |
| 184 | + | |
| 185 | + private int stretchValue(int value, int black, int white) { | |
| 186 | + if (value <= black) return 0; | |
| 187 | + if (value >= white) return 255; | |
| 188 | + return (value - black) * 255 / (white - black); | |
| 189 | + } | |
| 190 | + | |
| 191 | + /** | |
| 192 | + * 灰度化 | |
| 193 | + */ | |
| 194 | + private BufferedImage toGray(BufferedImage image) { | |
| 195 | + BufferedImage result = new BufferedImage( | |
| 196 | + image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); | |
| 197 | + Graphics g = result.getGraphics(); | |
| 198 | + g.drawImage(image, 0, 0, null); | |
| 199 | + g.dispose(); | |
| 200 | + return result; | |
| 201 | + } | |
| 202 | + | |
| 203 | + /** | |
| 204 | + * 自适应二值化 - 根据局部区域动态调整阈值 | |
| 205 | + */ | |
| 206 | + private BufferedImage adaptiveBinarize(BufferedImage image) { | |
| 207 | + BufferedImage result = new BufferedImage( | |
| 208 | + image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY); | |
| 209 | + | |
| 210 | + int blockSize = 15; | |
| 211 | + int constant = 5; | |
| 212 | + | |
| 213 | + for (int y = 0; y < image.getHeight(); y++) { | |
| 214 | + for (int x = 0; x < image.getWidth(); x++) { | |
| 215 | + // 计算局部区域的平均值 | |
| 216 | + int sum = 0; | |
| 217 | + int count = 0; | |
| 218 | + for (int ky = -blockSize/2; ky <= blockSize/2; ky++) { | |
| 219 | + for (int kx = -blockSize/2; kx <= blockSize/2; kx++) { | |
| 220 | + int px = Math.min(Math.max(x + kx, 0), image.getWidth() - 1); | |
| 221 | + int py = Math.min(Math.max(y + ky, 0), image.getHeight() - 1); | |
| 222 | + sum += new Color(image.getRGB(px, py)).getRed(); | |
| 223 | + count++; | |
| 224 | + } | |
| 225 | + } | |
| 226 | + int threshold = sum / count - constant; | |
| 227 | + | |
| 228 | + // 应用阈值 | |
| 229 | + int gray = new Color(image.getRGB(x, y)).getRed(); | |
| 230 | + int binary = gray > threshold ? 255 : 0; | |
| 231 | + result.setRGB(x, y, new Color(binary, binary, binary).getRGB()); | |
| 232 | + } | |
| 233 | + } | |
| 234 | + return result; | |
| 235 | + } | |
| 236 | + | |
| 237 | + /** | |
| 238 | + * 降噪 - 优化的中值滤波 | |
| 239 | + */ | |
| 240 | + private BufferedImage denoise(BufferedImage image) { | |
| 241 | + BufferedImage result = new BufferedImage( | |
| 242 | + image.getWidth(), image.getHeight(), image.getType()); | |
| 243 | + | |
| 244 | + for (int y = 1; y < image.getHeight() - 1; y++) { | |
| 245 | + for (int x = 1; x < image.getWidth() - 1; x++) { | |
| 246 | + int[] neighbors = new int[9]; | |
| 247 | + int index = 0; | |
| 248 | + for (int ky = -1; ky <= 1; ky++) { | |
| 249 | + for (int kx = -1; kx <= 1; kx++) { | |
| 250 | + neighbors[index++] = new Color(image.getRGB(x + kx, y + ky)).getRed(); | |
| 251 | + } | |
| 252 | + } | |
| 253 | + Arrays.sort(neighbors); | |
| 254 | + int median = neighbors[4]; | |
| 255 | + result.setRGB(x, y, new Color(median, median, median).getRGB()); | |
| 256 | + } | |
| 257 | + } | |
| 258 | + | |
| 259 | + // 处理边缘 | |
| 260 | + for (int x = 0; x < image.getWidth(); x++) { | |
| 261 | + result.setRGB(x, 0, image.getRGB(x, 0)); | |
| 262 | + result.setRGB(x, image.getHeight() - 1, image.getRGB(x, image.getHeight() - 1)); | |
| 263 | + } | |
| 264 | + for (int y = 0; y < image.getHeight(); y++) { | |
| 265 | + result.setRGB(0, y, image.getRGB(0, y)); | |
| 266 | + result.setRGB(image.getWidth() - 1, y, image.getRGB(image.getWidth() - 1, y)); | |
| 267 | + } | |
| 268 | + | |
| 269 | + return result; | |
| 270 | + } | |
| 271 | + | |
| 272 | + /** | |
| 273 | + * 锐化处理 - 增强文字边缘 | |
| 274 | + */ | |
| 275 | + private BufferedImage sharpen(BufferedImage image) { | |
| 276 | + BufferedImage result = new BufferedImage( | |
| 277 | + image.getWidth(), image.getHeight(), image.getType()); | |
| 278 | + | |
| 279 | + // 拉普拉斯锐化核 | |
| 280 | + float[] sharpenKernel = { | |
| 281 | + 0, -1, 0, | |
| 282 | + -1, 5, -1, | |
| 283 | + 0, -1, 0 | |
| 284 | + }; | |
| 285 | + | |
| 286 | + for (int y = 1; y < image.getHeight() - 1; y++) { | |
| 287 | + for (int x = 1; x < image.getWidth() - 1; x++) { | |
| 288 | + int sum = 0; | |
| 289 | + int index = 0; | |
| 290 | + for (int ky = -1; ky <= 1; ky++) { | |
| 291 | + for (int kx = -1; kx <= 1; kx++) { | |
| 292 | + int gray = new Color(image.getRGB(x + kx, y + ky)).getRed(); | |
| 293 | + sum += gray * sharpenKernel[index++]; | |
| 294 | + } | |
| 295 | + } | |
| 296 | + sum = Math.min(255, Math.max(0, sum)); | |
| 297 | + result.setRGB(x, y, new Color(sum, sum, sum).getRGB()); | |
| 298 | + } | |
| 299 | + } | |
| 300 | + | |
| 301 | + return result; | |
| 302 | + } | |
| 303 | + | |
| 304 | + /** | |
| 305 | + * 放大图片(如果图片太小) | |
| 306 | + */ | |
| 307 | + private BufferedImage scaleImageIfNeeded(BufferedImage image) { | |
| 308 | + int width = image.getWidth(); | |
| 309 | + int height = image.getHeight(); | |
| 310 | + | |
| 311 | + if (width >= MIN_WIDTH && height >= MIN_HEIGHT) { | |
| 312 | + return image; | |
| 313 | + } | |
| 314 | + | |
| 315 | + double scaleX = (double) MIN_WIDTH / width; | |
| 316 | + double scaleY = (double) MIN_HEIGHT / height; | |
| 317 | + double scale = Math.max(scaleX, scaleY); | |
| 318 | + | |
| 319 | + int newWidth = (int) (width * scale); | |
| 320 | + int newHeight = (int) (height * scale); | |
| 321 | + | |
| 322 | + // 使用更好的插值算法 | |
| 323 | + BufferedImage result = new BufferedImage(newWidth, newHeight, image.getType()); | |
| 324 | + Graphics2D g2d = result.createGraphics(); | |
| 325 | + g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, | |
| 326 | + RenderingHints.VALUE_INTERPOLATION_BICUBIC); | |
| 327 | + g2d.setRenderingHint(RenderingHints.KEY_RENDERING, | |
| 328 | + RenderingHints.VALUE_RENDER_QUALITY); | |
| 329 | + g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, | |
| 330 | + RenderingHints.VALUE_ANTIALIAS_ON); | |
| 331 | + g2d.drawImage(image, 0, 0, newWidth, newHeight, null); | |
| 332 | + g2d.dispose(); | |
| 333 | + | |
| 334 | + logger.debug("图片已放大: {}x{} -> {}x{}", width, height, newWidth, newHeight); | |
| 335 | + return result; | |
| 336 | + } | |
| 337 | + | |
| 338 | + /** | |
| 339 | + * 识别图片中的文字(增强版) | |
| 340 | + */ | |
| 341 | + public String extractText(File imageFile) { | |
| 342 | + if (imageFile == null || !imageFile.exists()) { | |
| 343 | + logger.error("图片文件不存在或为空"); | |
| 344 | + return "图片文件不存在"; | |
| 345 | + } | |
| 346 | + | |
| 347 | + OcrStats stats = new OcrStats(); | |
| 348 | + | |
| 349 | + try { | |
| 350 | + logger.info("开始识别图片: {}, 大小: {} bytes", | |
| 351 | + imageFile.getAbsolutePath(), imageFile.length()); | |
| 352 | + | |
| 353 | + // 读取原始图片 | |
| 354 | + long readStart = System.currentTimeMillis(); | |
| 355 | + BufferedImage originalImage = ImageIO.read(imageFile); | |
| 356 | + if (originalImage == null) { | |
| 357 | + return "无法读取图片文件,请确保图片格式正确"; | |
| 358 | + } | |
| 359 | + stats.imageSize = originalImage.getWidth() + "x" + originalImage.getHeight(); | |
| 360 | + | |
| 361 | + // 图片预处理 | |
| 362 | + long preprocessStart = System.currentTimeMillis(); | |
| 363 | + BufferedImage processedImage = preprocessImage(originalImage); | |
| 364 | + stats.preprocessTime = System.currentTimeMillis() - preprocessStart; | |
| 365 | + | |
| 366 | + // 可选:保存预处理图片用于调试(生产环境可注释) | |
| 367 | + if (logger.isDebugEnabled()) { | |
| 368 | + saveDebugImage(processedImage, imageFile); | |
| 369 | + } | |
| 370 | + | |
| 371 | + // 执行 OCR | |
| 372 | + long ocrStart = System.currentTimeMillis(); | |
| 373 | + String result = tesseract.doOCR(processedImage); | |
| 374 | + stats.ocrTime = System.currentTimeMillis() - ocrStart; | |
| 375 | + | |
| 376 | + logger.info("识别完成 - {}", stats); | |
| 377 | + | |
| 378 | + // 清理识别结果 | |
| 379 | + result = cleanResult(result); | |
| 380 | + | |
| 381 | + if (result.isEmpty()) { | |
| 382 | + logger.warn("识别结果为空,可能需要调整预处理参数"); | |
| 383 | + } | |
| 384 | + | |
| 385 | + return result; | |
| 386 | + | |
| 387 | + } catch (TesseractException e) { | |
| 388 | + logger.error("OCR识别失败: {}", e.getMessage(), e); | |
| 389 | + return "OCR识别失败: " + e.getMessage(); | |
| 390 | + } catch (IOException e) { | |
| 391 | + logger.error("读取图片失败: {}", e.getMessage(), e); | |
| 392 | + return "读取图片失败: " + e.getMessage(); | |
| 393 | + } | |
| 394 | + } | |
| 395 | + | |
| 396 | + /** | |
| 397 | + * 保存调试图片(仅用于调试) | |
| 398 | + */ | |
| 399 | + private void saveDebugImage(BufferedImage image, File originalFile) { | |
| 400 | + try { | |
| 401 | + String debugPath = originalFile.getParent() + "/debug_" + originalFile.getName(); | |
| 402 | + File debugFile = new File(debugPath); | |
| 403 | + ImageIO.write(image, "png", debugFile); | |
| 404 | + logger.debug("预处理图片已保存: {}", debugPath); | |
| 405 | + } catch (IOException e) { | |
| 406 | + logger.debug("保存调试图片失败: {}", e.getMessage()); | |
| 407 | + } | |
| 408 | + } | |
| 409 | + | |
| 410 | + /** | |
| 411 | + * 清理识别结果 | |
| 412 | + */ | |
| 413 | + private String cleanResult(String result) { | |
| 414 | + if (result == null || result.isEmpty()) { | |
| 415 | + return ""; | |
| 416 | + } | |
| 417 | + | |
| 418 | + // 去除首尾空白 | |
| 419 | + result = result.trim(); | |
| 420 | + | |
| 421 | + // 规范化换行符 | |
| 422 | + result = result.replaceAll("\\r\\n", "\n") | |
| 423 | + .replaceAll("\\r", "\n"); | |
| 424 | + | |
| 425 | + // 合并多个空行 | |
| 426 | + result = result.replaceAll("\n{3,}", "\n\n"); | |
| 427 | + | |
| 428 | + // 去除行首行尾空格 | |
| 429 | + String[] lines = result.split("\n"); | |
| 430 | + StringBuilder cleaned = new StringBuilder(); | |
| 431 | + for (String line : lines) { | |
| 432 | + cleaned.append(line.trim()).append("\n"); | |
| 433 | + } | |
| 434 | + | |
| 435 | + return cleaned.toString().trim(); | |
| 436 | + } | |
| 437 | + | |
| 438 | + /** | |
| 439 | + * 封装方法,接收上传的 MultipartFile | |
| 440 | + */ | |
| 441 | + public String extractTextFromMultipartFile(MultipartFile file) { | |
| 442 | + if (file == null || file.isEmpty()) { | |
| 443 | + logger.warn("上传的文件为空"); | |
| 444 | + return "上传的文件为空"; | |
| 445 | + } | |
| 446 | + | |
| 447 | + // 验证文件大小 | |
| 448 | + if (file.getSize() > MAX_FILE_SIZE) { | |
| 449 | + logger.warn("文件过大: {} bytes, 超过限制: {} bytes", | |
| 450 | + file.getSize(), MAX_FILE_SIZE); | |
| 451 | + return String.format("文件过大,最大支持 %dMB", MAX_FILE_SIZE / 1024 / 1024); | |
| 452 | + } | |
| 453 | + | |
| 454 | + // 验证文件格式 | |
| 455 | + String originalFilename = file.getOriginalFilename(); | |
| 456 | + if (originalFilename != null && !isAllowedImage(originalFilename)) { | |
| 457 | + logger.warn("不支持的文件格式: {}", originalFilename); | |
| 458 | + return "不支持的文件格式,仅支持: " + String.join(", ", ALLOWED_EXTENSIONS); | |
| 459 | + } | |
| 460 | + | |
| 461 | + Path tempFile = null; | |
| 462 | + try { | |
| 463 | + // 创建临时文件 | |
| 464 | + String suffix = getFileExtension(originalFilename); | |
| 465 | + tempFile = Files.createTempFile("ocr_", suffix); | |
| 466 | + file.transferTo(tempFile.toFile()); | |
| 467 | + | |
| 468 | + logger.info("临时文件创建成功: {}", tempFile); | |
| 469 | + | |
| 470 | + // 执行 OCR | |
| 471 | + String result = extractText(tempFile.toFile()); | |
| 472 | + | |
| 473 | + return result; | |
| 474 | + | |
| 475 | + } catch (IOException e) { | |
| 476 | + logger.error("文件处理失败: {}", e.getMessage(), e); | |
| 477 | + return "文件处理失败: " + e.getMessage(); | |
| 478 | + } finally { | |
| 479 | + // 清理临时文件 | |
| 480 | + cleanupTempFile(tempFile); | |
| 481 | + } | |
| 482 | + } | |
| 483 | + | |
| 484 | + /** | |
| 485 | + * 清理临时文件 | |
| 486 | + */ | |
| 487 | + private void cleanupTempFile(Path tempFile) { | |
| 488 | + if (tempFile != null) { | |
| 489 | + try { | |
| 490 | + Files.deleteIfExists(tempFile); | |
| 491 | + logger.debug("临时文件已删除: {}", tempFile); | |
| 492 | + } catch (IOException e) { | |
| 493 | + logger.warn("删除临时文件失败: {}", tempFile, e); | |
| 494 | + // 注册JVM退出时删除 | |
| 495 | + tempFile.toFile().deleteOnExit(); | |
| 496 | + } | |
| 497 | + } | |
| 498 | + } | |
| 499 | + | |
| 500 | + /** | |
| 501 | + * 批量识别(用于多张图片) | |
| 502 | + */ | |
| 503 | + public List<String> batchExtractText(List<MultipartFile> files) { | |
| 504 | + return files.stream() | |
| 505 | + .map(this::extractTextFromMultipartFile) | |
| 506 | + .collect(java.util.stream.Collectors.toList()); | |
| 507 | + } | |
| 508 | + | |
| 509 | + /** | |
| 510 | + * 检查文件扩展名是否允许 | |
| 511 | + */ | |
| 512 | + private boolean isAllowedImage(String filename) { | |
| 513 | + if (filename == null) { | |
| 514 | + return false; | |
| 515 | + } | |
| 516 | + String lowerFilename = filename.toLowerCase(); | |
| 517 | + return ALLOWED_EXTENSIONS.stream() | |
| 518 | + .anyMatch(lowerFilename::endsWith); | |
| 519 | + } | |
| 520 | + | |
| 521 | + /** | |
| 522 | + * 获取文件扩展名 | |
| 523 | + */ | |
| 524 | + private String getFileExtension(String filename) { | |
| 525 | + if (filename == null || !filename.contains(".")) { | |
| 526 | + return ".jpg"; | |
| 527 | + } | |
| 528 | + return filename.substring(filename.lastIndexOf(".")); | |
| 529 | + } | |
| 530 | +} | |
| 0 | 531 | \ No newline at end of file | ... | ... |
src/main/java/com/xly/ocr/test/Main.java
0 → 100644
| 1 | +package com.xly.ocr.test; | |
| 2 | + | |
| 3 | +import com.benjaminwan.ocrlibrary.OcrResult; | |
| 4 | +import com.benjaminwan.ocrlibrary.Point; | |
| 5 | +import com.benjaminwan.ocrlibrary.TextBlock; | |
| 6 | +import io.github.mymonstercat.Model; | |
| 7 | +import io.github.mymonstercat.ocr.InferenceEngine; | |
| 8 | +import io.github.mymonstercat.ocr.config.ParamConfig; | |
| 9 | + | |
| 10 | +import javax.imageio.ImageIO; | |
| 11 | +import java.awt.*; | |
| 12 | +import java.awt.image.BufferedImage; | |
| 13 | +import java.awt.image.Kernel; | |
| 14 | +import java.io.File; | |
| 15 | +import java.io.IOException; | |
| 16 | +import java.util.ArrayList; | |
| 17 | +import java.util.List; | |
| 18 | + | |
| 19 | +public class Main { | |
| 20 | + | |
| 21 | + static { | |
| 22 | + try { | |
| 23 | + String customTempDir = "D:/temp/ocrJava"; | |
| 24 | + File tempDir = new File(customTempDir); | |
| 25 | + if (!tempDir.exists()) { | |
| 26 | + tempDir.mkdirs(); | |
| 27 | + } | |
| 28 | + System.setProperty("java.io.tmpdir", customTempDir); | |
| 29 | + System.setProperty("TMP", customTempDir); | |
| 30 | + System.setProperty("TEMP", customTempDir); | |
| 31 | + System.setProperty("ORT_TMP_DIR", customTempDir); | |
| 32 | + | |
| 33 | + System.out.println("=================================="); | |
| 34 | + System.out.println("java.io.tmpdir: " + System.getProperty("java.io.tmpdir")); | |
| 35 | + System.out.println("临时目录是否存在: " + tempDir.exists()); | |
| 36 | + System.out.println("=================================="); | |
| 37 | + | |
| 38 | + } catch (Exception e) { | |
| 39 | + System.err.println("设置临时目录失败: " + e.getMessage()); | |
| 40 | + e.printStackTrace(); | |
| 41 | + } | |
| 42 | + } | |
| 43 | + | |
| 44 | + public static void main(String[] args) { | |
| 45 | + try { | |
| 46 | + System.out.println("OCR 程序开始执行..."); | |
| 47 | + | |
| 48 | + // 检查旧的临时目录 | |
| 49 | + String oldTempPath = "C:\\Users\\钱豹\\AppData\\Local\\Temp\\ocrJava"; | |
| 50 | + File oldTempDir = new File(oldTempPath); | |
| 51 | + if (oldTempDir.exists()) { | |
| 52 | + System.out.println("发现旧的临时目录: " + oldTempPath); | |
| 53 | + } | |
| 54 | + | |
| 55 | + // 1. 初始化引擎(V4 模型) | |
| 56 | + System.out.println("正在初始化 OCR 引擎..."); | |
| 57 | + InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); | |
| 58 | + | |
| 59 | + // 2. 创建优化的参数配置 | |
| 60 | + ParamConfig config = createOptimizedParamConfig(); | |
| 61 | + | |
| 62 | + // 3. 设置图片路径 | |
| 63 | + String imagePath = "E:/aa/b.jpg"; | |
| 64 | + | |
| 65 | + File imageFile = new File(imagePath); | |
| 66 | + if (!imageFile.exists()) { | |
| 67 | + System.err.println("图片文件不存在: " + imagePath); | |
| 68 | + return; | |
| 69 | + } | |
| 70 | + | |
| 71 | + // 4. 图像预处理(可选,注释掉可提高速度) | |
| 72 | + System.out.println("正在进行图像预处理..."); | |
| 73 | + String processedImagePath = preprocessImage(imagePath); | |
| 74 | + | |
| 75 | + // 5. 执行识别 | |
| 76 | + System.out.println("开始识别图片: " + processedImagePath); | |
| 77 | + long startTime = System.currentTimeMillis(); | |
| 78 | + OcrResult ocrResult = engine.runOcr(processedImagePath, config); | |
| 79 | + long endTime = System.currentTimeMillis(); | |
| 80 | + | |
| 81 | + // 6. 输出识别结果 | |
| 82 | + String text = ocrResult.getStrRes().trim(); | |
| 83 | + System.out.println("=================================="); | |
| 84 | + System.out.println("识别结果:"); | |
| 85 | + System.out.println(text); | |
| 86 | + System.out.println("=================================="); | |
| 87 | + System.out.println("识别耗时: " + (endTime - startTime) + " ms"); | |
| 88 | + | |
| 89 | + // 7. 输出文本块详细信息(可选,用于调试) | |
| 90 | + if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) { | |
| 91 | + System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):"); | |
| 92 | + List<TextBlock> textBlocks = ocrResult.getTextBlocks(); | |
| 93 | + for (int i = 0; i < textBlocks.size(); i++) { | |
| 94 | + TextBlock block = textBlocks.get(i); | |
| 95 | + System.out.printf(" 块%d: %s (置信度: %.2f)%n", | |
| 96 | + i + 1, | |
| 97 | + block.getText(), | |
| 98 | + block.getBoxScore() | |
| 99 | + ); | |
| 100 | + } | |
| 101 | + } | |
| 102 | + | |
| 103 | + // 8. 清理临时文件 | |
| 104 | + if (!processedImagePath.equals(imagePath)) { | |
| 105 | + File processedFile = new File(processedImagePath); | |
| 106 | + if (processedFile.exists()) { | |
| 107 | + processedFile.delete(); | |
| 108 | + System.out.println("\n已清理临时文件: " + processedImagePath); | |
| 109 | + } | |
| 110 | + } | |
| 111 | + | |
| 112 | + } catch (Exception e) { | |
| 113 | + System.err.println("OCR 识别失败: " + e.getMessage()); | |
| 114 | + e.printStackTrace(); | |
| 115 | + } | |
| 116 | + } | |
| 117 | + | |
| 118 | + private static ParamConfig createOptimizedParamConfig() { | |
| 119 | + ParamConfig config = new ParamConfig(); | |
| 120 | + | |
| 121 | + // 手写体专用超强参数 | |
| 122 | + config.setPadding(100); | |
| 123 | + config.setMaxSideLen(1200); | |
| 124 | + | |
| 125 | + // 极低阈值 = 不漏检任何手写文字 | |
| 126 | + config.setBoxScoreThresh(0.25f); | |
| 127 | + config.setBoxThresh(0.15f); | |
| 128 | + config.setUnClipRatio(2.5f); | |
| 129 | + | |
| 130 | + // 必须开启角度矫正 | |
| 131 | + config.setDoAngle(true); | |
| 132 | + config.setMostAngle(true); | |
| 133 | + | |
| 134 | + return config; | |
| 135 | + } | |
| 136 | + | |
| 137 | + /** | |
| 138 | + * 图像预处理:提高OCR识别准确度 | |
| 139 | + */ | |
| 140 | + private static String preprocessImage(String imagePath) throws IOException { | |
| 141 | + File inputFile = new File(imagePath); | |
| 142 | + BufferedImage originalImage = ImageIO.read(inputFile); | |
| 143 | + | |
| 144 | + if (originalImage == null) return imagePath; | |
| 145 | + | |
| 146 | + // 固定缩放到最佳尺寸 | |
| 147 | + BufferedImage resized = resizeImageWithQuality(originalImage, 1000, 1500); | |
| 148 | + | |
| 149 | + // 手写体必须:二值化(黑白强化)+ 锐化 | |
| 150 | + BufferedImage binary = toBinary(resized); // 黑白强化,核心! | |
| 151 | + BufferedImage sharp = sharpenImage(binary); // 锐化 | |
| 152 | + | |
| 153 | + // 保存 | |
| 154 | + String outputPath = imagePath.substring(0, imagePath.lastIndexOf(".")) + "_final.jpg"; | |
| 155 | + ImageIO.write(sharp, "jpg", new File(outputPath)); | |
| 156 | + return outputPath; | |
| 157 | + } | |
| 158 | + | |
| 159 | + // 手写体专用:黑白二值化,文字瞬间清晰 | |
| 160 | + private static BufferedImage toBinary(BufferedImage image) { | |
| 161 | + BufferedImage gray = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); | |
| 162 | + Graphics g = gray.getGraphics(); | |
| 163 | + g.drawImage(image, 0, 0, null); | |
| 164 | + g.dispose(); | |
| 165 | + | |
| 166 | + // 手写体阈值 120,文字最清晰 | |
| 167 | + int threshold = 120; | |
| 168 | + for (int y = 0; y < gray.getHeight(); y++) { | |
| 169 | + for (int x = 0; x < gray.getWidth(); x++) { | |
| 170 | + int rgb = gray.getRGB(x, y); | |
| 171 | + int grayValue = (rgb >> 16) & 0xFF; | |
| 172 | + if (grayValue < threshold) { | |
| 173 | + gray.setRGB(x, y, Color.BLACK.getRGB()); | |
| 174 | + } else { | |
| 175 | + gray.setRGB(x, y, Color.WHITE.getRGB()); | |
| 176 | + } | |
| 177 | + } | |
| 178 | + } | |
| 179 | + return gray; | |
| 180 | + } | |
| 181 | + | |
| 182 | + /** | |
| 183 | + * 高质量调整图片大小 | |
| 184 | + */ | |
| 185 | + private static BufferedImage resizeImageWithQuality(BufferedImage original, int maxWidth, int maxHeight) { | |
| 186 | + int width = original.getWidth(); | |
| 187 | + int height = original.getHeight(); | |
| 188 | + | |
| 189 | + // 如果图片尺寸合适,不进行调整 | |
| 190 | + if (width <= maxWidth && height <= maxHeight) { | |
| 191 | + return original; | |
| 192 | + } | |
| 193 | + | |
| 194 | + // 计算缩放比例 | |
| 195 | + double ratio = Math.min((double) maxWidth / width, (double) maxHeight / height); | |
| 196 | + int newWidth = (int) (width * ratio); | |
| 197 | + int newHeight = (int) (height * ratio); | |
| 198 | + | |
| 199 | + // 创建缩放后的图片 | |
| 200 | + BufferedImage resized = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB); | |
| 201 | + Graphics2D g = resized.createGraphics(); | |
| 202 | + g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); | |
| 203 | + g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); | |
| 204 | + g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); | |
| 205 | + g.drawImage(original, 0, 0, newWidth, newHeight, null); | |
| 206 | + g.dispose(); | |
| 207 | + | |
| 208 | + System.out.println("图片已缩放: " + width + "x" + height + " -> " + newWidth + "x" + newHeight); | |
| 209 | + return resized; | |
| 210 | + } | |
| 211 | + | |
| 212 | + /** | |
| 213 | + * 增强对比度 | |
| 214 | + */ | |
| 215 | + private static BufferedImage enhanceContrast(BufferedImage image) { | |
| 216 | + BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); | |
| 217 | + | |
| 218 | + for (int y = 0; y < image.getHeight(); y++) { | |
| 219 | + for (int x = 0; x < image.getWidth(); x++) { | |
| 220 | + Color color = new Color(image.getRGB(x, y)); | |
| 221 | + | |
| 222 | + // 增强对比度 | |
| 223 | + int red = (int) (color.getRed() * 1.2); | |
| 224 | + int green = (int) (color.getGreen() * 1.2); | |
| 225 | + int blue = (int) (color.getBlue() * 1.2); | |
| 226 | + | |
| 227 | + // 限制RGB值范围 | |
| 228 | + red = Math.min(255, Math.max(0, red)); | |
| 229 | + green = Math.min(255, Math.max(0, green)); | |
| 230 | + blue = Math.min(255, Math.max(0, blue)); | |
| 231 | + | |
| 232 | + result.setRGB(x, y, new Color(red, green, blue).getRGB()); | |
| 233 | + } | |
| 234 | + } | |
| 235 | + | |
| 236 | + System.out.println("对比度已增强"); | |
| 237 | + return result; | |
| 238 | + } | |
| 239 | + | |
| 240 | + /** | |
| 241 | + * 锐化图像(可选,使文字边缘更清晰) | |
| 242 | + */ | |
| 243 | + private static BufferedImage sharpenImage(BufferedImage image) { | |
| 244 | + BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); | |
| 245 | + | |
| 246 | + float[] sharpenKernel = { | |
| 247 | + 0, -1, 0, | |
| 248 | + -1, 5, -1, | |
| 249 | + 0, -1, 0 | |
| 250 | + }; | |
| 251 | + | |
| 252 | + Kernel kernel = new Kernel(3, 3, sharpenKernel); | |
| 253 | + java.awt.image.ConvolveOp op = new java.awt.image.ConvolveOp(kernel, java.awt.image.ConvolveOp.EDGE_NO_OP, null); | |
| 254 | + op.filter(image, result); | |
| 255 | + | |
| 256 | + System.out.println("图像已锐化"); | |
| 257 | + return result; | |
| 258 | + } | |
| 259 | +} | |
| 0 | 260 | \ No newline at end of file | ... | ... |
src/main/java/com/xly/ocr/test/Test.java
0 → 100644
| 1 | +package com.xly.ocr.test; | |
| 2 | + | |
| 3 | +import com.benjaminwan.ocrlibrary.OcrResult; | |
| 4 | +import com.benjaminwan.ocrlibrary.TextBlock; | |
| 5 | +import io.github.mymonstercat.Model; | |
| 6 | +import io.github.mymonstercat.ocr.InferenceEngine; | |
| 7 | +import io.github.mymonstercat.ocr.config.ParamConfig; | |
| 8 | + | |
| 9 | +import javax.imageio.ImageIO; | |
| 10 | +import java.awt.*; | |
| 11 | +import java.awt.image.BufferedImage; | |
| 12 | +import java.io.File; | |
| 13 | +import java.io.IOException; | |
| 14 | +import java.util.List; | |
| 15 | + | |
| 16 | +public class Test { | |
| 17 | + | |
| 18 | + static { | |
| 19 | + try { | |
| 20 | + String customTempDir = "D:/temp/ocrJava"; | |
| 21 | + File tempDir = new File(customTempDir); | |
| 22 | + if (!tempDir.exists()) { | |
| 23 | + tempDir.mkdirs(); | |
| 24 | + } | |
| 25 | + System.setProperty("java.io.tmpdir", customTempDir); | |
| 26 | + System.setProperty("TMP", customTempDir); | |
| 27 | + System.setProperty("TEMP", customTempDir); | |
| 28 | + | |
| 29 | + System.out.println("=================================="); | |
| 30 | + System.out.println("临时目录: " + System.getProperty("java.io.tmpdir")); | |
| 31 | + System.out.println("=================================="); | |
| 32 | + | |
| 33 | + } catch (Exception e) { | |
| 34 | + System.err.println("设置临时目录失败: " + e.getMessage()); | |
| 35 | + } | |
| 36 | + } | |
| 37 | + | |
| 38 | + public static void main(String[] args) { | |
| 39 | + try { | |
| 40 | + System.out.println("OCR 程序开始执行..."); | |
| 41 | + | |
| 42 | + // 1. 初始化引擎(使用 v4 模型) | |
| 43 | + System.out.println("正在初始化 OCR 引擎 (PP-OCRv4)..."); | |
| 44 | + InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); | |
| 45 | + | |
| 46 | + // 2. 创建优化的参数配置 | |
| 47 | + ParamConfig config = createOptimizedParamConfig(); | |
| 48 | + | |
| 49 | + // 3. 图片路径 | |
| 50 | + String imagePath = "E:/aa/b.jpg"; | |
| 51 | + File imageFile = new File(imagePath); | |
| 52 | + if (!imageFile.exists()) { | |
| 53 | + System.err.println("图片文件不存在: " + imagePath); | |
| 54 | + return; | |
| 55 | + } | |
| 56 | + | |
| 57 | + // 4. 图像预处理(直接处理原图,不保存临时文件) | |
| 58 | + System.out.println("正在进行图像预处理..."); | |
| 59 | + BufferedImage processedImage = preprocessImage(imageFile); | |
| 60 | + | |
| 61 | + // 5. 保存预处理后的图片到临时目录 | |
| 62 | + String processedImagePath = "D:/temp/ocrJava/processed_" + System.currentTimeMillis() + ".png"; | |
| 63 | + ImageIO.write(processedImage, "png", new File(processedImagePath)); | |
| 64 | + System.out.println("预处理图片已保存: " + processedImagePath); | |
| 65 | + | |
| 66 | + // 6. 执行识别 | |
| 67 | + System.out.println("开始识别图片..."); | |
| 68 | + long startTime = System.currentTimeMillis(); | |
| 69 | + OcrResult ocrResult = engine.runOcr(processedImagePath, config); | |
| 70 | + long endTime = System.currentTimeMillis(); | |
| 71 | + | |
| 72 | + // 7. 输出结果 | |
| 73 | + String text = ocrResult.getStrRes().trim(); | |
| 74 | + System.out.println("\n=================================="); | |
| 75 | + System.out.println("识别结果:"); | |
| 76 | + System.out.println(text); | |
| 77 | + System.out.println("=================================="); | |
| 78 | + System.out.println("识别耗时: " + (endTime - startTime) + " ms"); | |
| 79 | + | |
| 80 | + // 8. 输出每个文本块 | |
| 81 | + if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) { | |
| 82 | + System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):"); | |
| 83 | + List<TextBlock> textBlocks = ocrResult.getTextBlocks(); | |
| 84 | + for (int i = 0; i < textBlocks.size(); i++) { | |
| 85 | + TextBlock block = textBlocks.get(i); | |
| 86 | + System.out.printf(" 块%d: %s (置信度: %.2f)%n", | |
| 87 | + i + 1, | |
| 88 | + block.getText(), | |
| 89 | + block.getBoxScore() | |
| 90 | + ); | |
| 91 | + } | |
| 92 | + } | |
| 93 | + | |
| 94 | + // 9. 清理临时文件 | |
| 95 | + new File(processedImagePath).delete(); | |
| 96 | + | |
| 97 | + } catch (Exception e) { | |
| 98 | + System.err.println("OCR 识别失败: " + e.getMessage()); | |
| 99 | + e.printStackTrace(); | |
| 100 | + } | |
| 101 | + } | |
| 102 | + | |
| 103 | + /** | |
| 104 | + * 优化的参数配置 | |
| 105 | + */ | |
| 106 | + private static ParamConfig createOptimizedParamConfig() { | |
| 107 | + ParamConfig config = new ParamConfig(); | |
| 108 | + | |
| 109 | + config.setPadding(50); | |
| 110 | + config.setMaxSideLen(0); | |
| 111 | + config.setBoxScoreThresh(0.4f); | |
| 112 | + config.setBoxThresh(0.25f); | |
| 113 | + config.setUnClipRatio(1.8f); | |
| 114 | + config.setDoAngle(true); | |
| 115 | + config.setMostAngle(true); | |
| 116 | + | |
| 117 | + return config; | |
| 118 | + } | |
| 119 | + | |
| 120 | + /** | |
| 121 | + * 图像预处理 - 直接返回处理后的 BufferedImage | |
| 122 | + */ | |
| 123 | + private static BufferedImage preprocessImage(File imageFile) throws IOException { | |
| 124 | + BufferedImage original = ImageIO.read(imageFile); | |
| 125 | + if (original == null) { | |
| 126 | + throw new IOException("无法读取图片: " + imageFile.getPath()); | |
| 127 | + } | |
| 128 | + | |
| 129 | + System.out.println("原始尺寸: " + original.getWidth() + "x" + original.getHeight()); | |
| 130 | + | |
| 131 | + BufferedImage processed = original; | |
| 132 | + | |
| 133 | + // 1. 如果图片太大,缩小尺寸 | |
| 134 | + if (processed.getWidth() > 2000 || processed.getHeight() > 2000) { | |
| 135 | + processed = resizeImage(processed, 1600, 1600); | |
| 136 | + } | |
| 137 | + | |
| 138 | + // 2. 增强对比度 | |
| 139 | + processed = enhanceContrast(processed); | |
| 140 | + | |
| 141 | + System.out.println("处理后尺寸: " + processed.getWidth() + "x" + processed.getHeight()); | |
| 142 | + | |
| 143 | + return processed; | |
| 144 | + } | |
| 145 | + | |
| 146 | + /** | |
| 147 | + * 调整图片大小 | |
| 148 | + */ | |
| 149 | + private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) { | |
| 150 | + int w = image.getWidth(); | |
| 151 | + int h = image.getHeight(); | |
| 152 | + double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h); | |
| 153 | + if (ratio >= 1) return image; | |
| 154 | + | |
| 155 | + int newW = (int) (w * ratio); | |
| 156 | + int newH = (int) (h * ratio); | |
| 157 | + | |
| 158 | + BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB); | |
| 159 | + Graphics2D g = resized.createGraphics(); | |
| 160 | + g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); | |
| 161 | + g.drawImage(image, 0, 0, newW, newH, null); | |
| 162 | + g.dispose(); | |
| 163 | + return resized; | |
| 164 | + } | |
| 165 | + | |
| 166 | + /** | |
| 167 | + * 增强对比度 | |
| 168 | + */ | |
| 169 | + private static BufferedImage enhanceContrast(BufferedImage image) { | |
| 170 | + BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); | |
| 171 | + for (int y = 0; y < image.getHeight(); y++) { | |
| 172 | + for (int x = 0; x < image.getWidth(); x++) { | |
| 173 | + Color c = new Color(image.getRGB(x, y)); | |
| 174 | + int r = Math.min(255, (int) (c.getRed() * 1.15)); | |
| 175 | + int g = Math.min(255, (int) (c.getGreen() * 1.15)); | |
| 176 | + int b = Math.min(255, (int) (c.getBlue() * 1.15)); | |
| 177 | + result.setRGB(x, y, new Color(r, g, b).getRGB()); | |
| 178 | + } | |
| 179 | + } | |
| 180 | + return result; | |
| 181 | + } | |
| 182 | +} | |
| 0 | 183 | \ No newline at end of file | ... | ... |
src/main/java/com/xly/ocr/web/OcrController.java
0 → 100644
| 1 | +//package com.xly.ocr.web; | |
| 2 | +// | |
| 3 | +//import com.xly.ocr.service.OcrService; | |
| 4 | +//import org.springframework.beans.factory.annotation.Autowired; | |
| 5 | +//import org.springframework.http.ResponseEntity; | |
| 6 | +//import org.springframework.web.bind.annotation.*; | |
| 7 | +//import org.springframework.web.multipart.MultipartFile; | |
| 8 | +// | |
| 9 | +//import java.util.HashMap; | |
| 10 | +//import java.util.List; | |
| 11 | +//import java.util.Map; | |
| 12 | +// | |
| 13 | +//@RestController | |
| 14 | +//@RequestMapping("/api/ocr") | |
| 15 | +//public class OcrController { | |
| 16 | +// | |
| 17 | +// @Autowired | |
| 18 | +// private OcrService ocrService; | |
| 19 | +// | |
| 20 | +// @PostMapping("/extract") | |
| 21 | +// public ResponseEntity<Map<String, Object>> extractText( | |
| 22 | +// @RequestParam("file") MultipartFile file) { | |
| 23 | +// | |
| 24 | +// Map<String, Object> response = new HashMap<>(); | |
| 25 | +// long startTime = System.currentTimeMillis(); | |
| 26 | +// | |
| 27 | +// String result = ocrService.extractTextFromMultipartFile(file); | |
| 28 | +// | |
| 29 | +// response.put("text", result); | |
| 30 | +// response.put("time", System.currentTimeMillis() - startTime); | |
| 31 | +// response.put("success", !result.startsWith("错误") && !result.startsWith("失败")); | |
| 32 | +// | |
| 33 | +// return ResponseEntity.ok(response); | |
| 34 | +// } | |
| 35 | +// | |
| 36 | +// @PostMapping("/batch") | |
| 37 | +// public ResponseEntity<List<String>> batchExtract( | |
| 38 | +// @RequestParam("files") List<MultipartFile> files) { | |
| 39 | +// List<String> results = ocrService.batchExtractText(files); | |
| 40 | +// return ResponseEntity.ok(results); | |
| 41 | +// } | |
| 42 | +//} | |
| 0 | 43 | \ No newline at end of file | ... | ... |
src/main/java/com/xly/service/XlyErpService.java
| ... | ... | @@ -161,13 +161,29 @@ public class XlyErpService { |
| 161 | 161 | && ObjectUtil.isNotEmpty(session.getCurrentTool().getSInputTabelName()) |
| 162 | 162 | && ObjectUtil.isNotEmpty(session.getCurrentTool().getSStructureMemo())) |
| 163 | 163 | ){ |
| 164 | - //查询是否走向量库 还是数据库查询 | |
| 165 | - Boolean isAggregation = aiAgent.routeQuery(session.getUserId(), input); | |
| 164 | + //查询缓存是否存在取缓存 直接走 | |
| 165 | + Map<String,Object> cachMap = getDynamicTableCach(session,userInput); | |
| 166 | + Boolean isAggregation; | |
| 167 | + Boolean bHasCach; | |
| 168 | + String sCleanSql = StrUtil.EMPTY; | |
| 169 | + if(ObjectUtil.isEmpty(cachMap)){ | |
| 170 | + //查询是否走向量库 还是数据库查询 | |
| 171 | + isAggregation = aiAgent.routeQuery(session.getUserId(), input); | |
| 172 | + session.setDbCach("D"); | |
| 173 | + bHasCach = false; | |
| 174 | + }else{ | |
| 175 | + isAggregation = "MYSQL".equals(cachMap.get("cachType")); | |
| 176 | + session.setDbCach("H"); | |
| 177 | + bHasCach = true; | |
| 178 | + sCleanSql = ObjectUtil.isNotEmpty(cachMap.get("sSqlContent"))?cachMap.get("sSqlContent").toString() : StrUtil.EMPTY; | |
| 179 | + } | |
| 166 | 180 | if(!isAggregation){ |
| 167 | 181 | //获取常量库内容 |
| 168 | - sResponMessage = getMilvus(session, input, aiAgent); | |
| 182 | + session.setDbType("X"); | |
| 183 | + sResponMessage = getMilvus(session, input, aiAgent,bHasCach); | |
| 169 | 184 | }else { |
| 170 | - sResponMessage = getDynamicTableSql(session, input, userId, userInput,0,StrUtil.EMPTY,StrUtil.EMPTY,"0",StrUtil.EMPTY, aiAgent); | |
| 185 | + session.setDbType("G"); | |
| 186 | + sResponMessage = getDynamicTableSql(session, input, userId, userInput,0,StrUtil.EMPTY,StrUtil.EMPTY,"0",StrUtil.EMPTY, aiAgent,sCleanSql); | |
| 171 | 187 | } |
| 172 | 188 | return Flux.just(AiResponseDTO.builder() |
| 173 | 189 | .aiText(sResponMessage) |
| ... | ... | @@ -235,8 +251,10 @@ public class XlyErpService { |
| 235 | 251 | session = userSceneSessionService.getUserSceneSession(userId,sUserName,sBrandsId,sSubsidiaryId,sUserType,authorization); |
| 236 | 252 | session.setAuthorization(authorization); |
| 237 | 253 | session.setSFunPrompts(null); |
| 254 | + session.setDbCach(StrUtil.EMPTY); | |
| 255 | + session.setDbType(StrUtil.EMPTY); | |
| 256 | + | |
| 238 | 257 | sceneName = ObjectUtil.isNotEmpty(session.getCurrentScene())?session.getCurrentScene().getSSceneName():StrUtil.EMPTY; |
| 239 | - methodName = ObjectUtil.isNotEmpty(session.getCurrentTool())?session.getCurrentTool().getSMethodName():StrUtil.EMPTY; | |
| 240 | 258 | // 2. 特殊指令:重置场景(无论是否已选,都可重置) |
| 241 | 259 | if (input.contains("重置") || input.contains("重新选择")) { |
| 242 | 260 | //清除记忆缓存 |
| ... | ... | @@ -268,7 +286,7 @@ public class XlyErpService { |
| 268 | 286 | ){ |
| 269 | 287 | sResponMessage = aiAgent.chat(userId, input); |
| 270 | 288 | } |
| 271 | - | |
| 289 | + methodName = ObjectUtil.isNotEmpty(session.getCurrentTool())?session.getCurrentTool().getSMethodName():StrUtil.EMPTY; | |
| 272 | 290 | if(ObjectUtil.isNotEmpty(session.getCurrentTool()) |
| 273 | 291 | && !ObjectUtil.isNotEmpty(session.getCurrentTool().getSInputTabelName()) |
| 274 | 292 | ){ |
| ... | ... | @@ -283,28 +301,44 @@ public class XlyErpService { |
| 283 | 301 | && ObjectUtil.isNotEmpty(session.getCurrentTool().getSInputTabelName()) |
| 284 | 302 | && ObjectUtil.isNotEmpty(session.getCurrentTool().getSStructureMemo())) |
| 285 | 303 | ){ |
| 286 | - //查询是否走向量库 还是数据库查询 | |
| 287 | - Boolean isAggregation = aiAgent.routeQuery(session.getUserId(), input); | |
| 304 | + //查询缓存是否存在取缓存 直接走 | |
| 305 | + Map<String,Object> cachMap = getDynamicTableCach(session,userInput); | |
| 306 | + Boolean isAggregation; | |
| 307 | + Boolean bHasCach; | |
| 308 | + String sCleanSql = StrUtil.EMPTY; | |
| 309 | + if(ObjectUtil.isEmpty(cachMap)){ | |
| 310 | + //查询是否走向量库 还是数据库查询 | |
| 311 | + isAggregation = aiAgent.routeQuery(session.getUserId(), input); | |
| 312 | + session.setDbCach("D"); | |
| 313 | + bHasCach = false; | |
| 314 | + }else{ | |
| 315 | + isAggregation = "MYSQL".equals(cachMap.get("cachType")); | |
| 316 | + session.setDbCach("H"); | |
| 317 | + bHasCach = true; | |
| 318 | + sCleanSql = ObjectUtil.isNotEmpty(cachMap.get("sSqlContent"))?cachMap.get("sSqlContent").toString() : StrUtil.EMPTY; | |
| 319 | + } | |
| 288 | 320 | if(!isAggregation){ |
| 289 | 321 | //获取常量库内容 |
| 290 | - sResponMessage = getMilvus(session, input, aiAgent); | |
| 322 | + session.setDbType("X"); | |
| 323 | + sResponMessage = getMilvus(session, input, aiAgent,bHasCach); | |
| 291 | 324 | }else { |
| 292 | - sResponMessage = getDynamicTableSql(session, input, userId, userInput,0,StrUtil.EMPTY,StrUtil.EMPTY,"0",StrUtil.EMPTY, aiAgent); | |
| 325 | + session.setDbType("G"); | |
| 326 | + sResponMessage = getDynamicTableSql(session, input, userId, userInput,0,StrUtil.EMPTY,StrUtil.EMPTY,"0",StrUtil.EMPTY, aiAgent,sCleanSql); | |
| 293 | 327 | } |
| 294 | - return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText(sResponMessage).sReturnType(ReturnTypeCode.HTML.getCode()).build(); | |
| 328 | + return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText(sResponMessage).sReturnType(ReturnTypeCode.HTML.getCode()).dbType(session.getDbType()).dbCach(session.getDbCach()).build(); | |
| 295 | 329 | } else if (ObjectUtil.isNotEmpty(session.getCurrentTool())) { |
| 296 | 330 | //2.处理工具参数采集结束后业务逻辑处理 |
| 297 | 331 | //调用方法,参数缺失部分提示,就直接使用方法返回的 |
| 298 | 332 | sResponMessage = dynamicToolProvider.doDynamicTool(session.getCurrentTool(),session); |
| 299 | - return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText(sResponMessage).sReturnType(ReturnTypeCode.HTML.getCode()).build(); | |
| 333 | + return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText(sResponMessage).dbType(session.getDbType()).dbCach(session.getDbCach()).sReturnType(ReturnTypeCode.HTML.getCode()).build(); | |
| 300 | 334 | }else if(session.getCurrentScene()== null ){ |
| 301 | - return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText("当前场景:没有选择 退回当前场景 请输入 "+ CommonConstant.RESET + sResponMessage).sReturnType(ReturnTypeCode.HTML.getCode()).build(); | |
| 335 | + return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText("当前场景:没有选择 退回当前场景 请输入 "+ CommonConstant.RESET + sResponMessage).dbType(session.getDbType()).dbCach(session.getDbCach()).sReturnType(ReturnTypeCode.HTML.getCode()).build(); | |
| 302 | 336 | }else{ |
| 303 | 337 | return getChatiAgent (input, session); |
| 304 | 338 | } |
| 305 | 339 | } catch (Exception e) { |
| 306 | 340 | e.printStackTrace(); |
| 307 | - return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText("系统异常:" + e.getMessage() + ",请稍后重试!").sReturnType(ReturnTypeCode.HTML.getCode()).build(); | |
| 341 | + return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText("系统异常:" + e.getMessage() + ",请稍后重试!").dbType(session.getDbType()).dbCach(session.getDbCach()).sReturnType(ReturnTypeCode.HTML.getCode()).build(); | |
| 308 | 342 | }finally { |
| 309 | 343 | //5.执行工具方法后,清除记忆 |
| 310 | 344 | if(session !=null && session.getBCleanMemory()){ |
| ... | ... | @@ -363,23 +397,56 @@ public class XlyErpService { |
| 363 | 397 | * @return java.lang.String |
| 364 | 398 | * @Description 查询向量库 |
| 365 | 399 | **/ |
| 366 | - private String getMilvus(UserSceneSession session,String userInput,ErpAiAgent aiAgent){ | |
| 400 | + private String getMilvus(UserSceneSession session,String userInput,ErpAiAgent aiAgent,Boolean bCach){ | |
| 367 | 401 | String resultExplain = "信息模糊,请提供更具体的问题或指令"; |
| 368 | 402 | try{ |
| 403 | + addSessionUserQuestionList(session, userInput); | |
| 369 | 404 | String sVectorfiled = session.getCurrentTool().getSVectorfiled(); |
| 370 | 405 | String sInputTabelName = session.getCurrentTool().getSInputTabelName(); |
| 371 | 406 | String sVectorfiledAll = session.getCurrentTool().getSVectorfiledAll(); |
| 372 | - Map<String,Object> rMap = milvusService.getMilvusFiled(sVectorfiled,sVectorfiledAll); | |
| 407 | + String sVectorfiledShow = session.getCurrentTool().getSVectorfiledShow(); | |
| 408 | + Map<String,Object> rMap = milvusService.getMilvusFiled(sVectorfiled,sVectorfiledAll,sVectorfiledShow); | |
| 373 | 409 | String sMilvusFiled = rMap.get("sMilvusFiled").toString(); |
| 374 | 410 | String sMilvusFiledDescription = rMap.get("sMilvusFiledDescription").toString(); |
| 375 | 411 | String sMilvusFiledDescriptionAll = rMap.get("sMilvusFiledDescriptionAll").toString(); |
| 376 | - List<String> fields = (List<String>) rMap.get("sFileds"); | |
| 377 | -// List<Map<String, String>> title = (List<Map<String, String>>) rMap.get("title"); | |
| 378 | - String milvusFilter = aiAgent.getMilvusFilter(session.getUserId(),userInput, sMilvusFiled, sMilvusFiledDescription); | |
| 379 | - List<Map<String,Object>> data = milvusService.getDataToCollection(sInputTabelName, milvusFilter,userInput,100,fields); | |
| 380 | - //采用表格形式显示 | |
| 381 | - resultExplain = aiAgent.explainMilvusResult(session.getUserId(),userInput,sMilvusFiledDescriptionAll,JSONObject.toJSONString(data)); | |
| 382 | - //buildMarkdownTableWithStream(data, title); | |
| 412 | + List<String> filedsShow = (List<String>) rMap.get("filedsShow"); | |
| 413 | + List<Map<String, String>> title = (List<Map<String, String>>) rMap.get("title"); | |
| 414 | + String milvusFilter = StrUtil.EMPTY; | |
| 415 | + if(!bCach){ | |
| 416 | + milvusFilter = aiAgent.getMilvusFilter(session.getUserId(),userInput, sMilvusFiled, sMilvusFiledDescription,DateUtil.now()); | |
| 417 | + log.info("查询向量库条件{}",milvusFilter); | |
| 418 | + milvusFilter = milvusService.isValidMilvusFilter(milvusFilter)?milvusFilter : null; | |
| 419 | + log.info("实际查询向量库条件{}",milvusFilter); | |
| 420 | + } | |
| 421 | + Integer pageSize = 100; | |
| 422 | + if(ObjectUtil.isEmpty(milvusFilter)){ | |
| 423 | + pageSize = 10; | |
| 424 | + } | |
| 425 | +// 待条件全查 不带 10条 | |
| 426 | + List<Map<String,Object>> data = milvusService.getDataToCollection(sInputTabelName, milvusFilter,userInput,pageSize,filedsShow); | |
| 427 | + //存储到历史问题库(带where条件了就不存)并且没有记录过缓存 | |
| 428 | + if(!bCach && ObjectUtil.isEmpty(milvusFilter)){ | |
| 429 | + //执行操作记录表 | |
| 430 | + try{ | |
| 431 | + List<ChatMessage> chatMessage = operableChatMemoryProvider.getCurrentChatMessages(session.getUserId()); | |
| 432 | + //插入向量库 | |
| 433 | + doAiUserAgentQuestion(session,userInput,milvusFilter,"MILVUS",chatMessage); | |
| 434 | + }catch (Exception e){ | |
| 435 | + log.error("插入向量库异常",e); | |
| 436 | + } | |
| 437 | + } | |
| 438 | + //采用表格形式显示明细、...详情、...记录、...列表、...清单 | |
| 439 | + if(ObjectUtil.isEmpty(milvusFilter) | |
| 440 | + || userInput.contains("明细") | |
| 441 | + || userInput.contains("详情") | |
| 442 | + || userInput.contains("记录") | |
| 443 | + || userInput.contains("列表") | |
| 444 | + || userInput.contains("清单") | |
| 445 | + ){ | |
| 446 | + resultExplain = buildMarkdownTableWithStream(data, title); | |
| 447 | + }else{ | |
| 448 | + resultExplain = aiAgent.explainMilvusResult(session.getUserId(),userInput,sMilvusFiledDescriptionAll,JSONObject.toJSONString(data)); | |
| 449 | + } | |
| 383 | 450 | return resultExplain; |
| 384 | 451 | }catch (Exception e){ |
| 385 | 452 | e.printStackTrace(); |
| ... | ... | @@ -435,25 +502,16 @@ public class XlyErpService { |
| 435 | 502 | * @return java.lang.String |
| 436 | 503 | * @Description 获取执行动态SQL |
| 437 | 504 | **/ |
| 438 | - private String getDynamicTableSql(UserSceneSession session,String input,String userId,String userInput,Integer attempt,String errorSql,String errorMessage,String iErroCount,String historySqlList,ErpAiAgent aiAgent){ | |
| 505 | + private String getDynamicTableSql(UserSceneSession session,String input,String userId,String userInput,Integer attempt,String errorSql,String errorMessage,String iErroCount,String historySqlList,ErpAiAgent aiAgent,String cleanSql){ | |
| 439 | 506 | String resultExplain = "信息模糊,请提供更具体的问题或指令"; |
| 440 | 507 | try{ |
| 441 | 508 | while (attempt < maxRetries) { |
| 442 | 509 | try{ |
| 443 | 510 | attempt = attempt+1; |
| 444 | 511 | if(attempt==1){ |
| 445 | - List<String> userQuestionList = session.getSUserQuestionList(); | |
| 446 | - if(ObjectUtil.isEmpty(userQuestionList)){ | |
| 447 | - userQuestionList = new ArrayList<>(); | |
| 448 | - } | |
| 449 | - String sQuestion = StrUtil.replace(userInput," ",StrUtil.EMPTY); | |
| 450 | - sQuestion = StrUtil.replace(sQuestion,"\t",StrUtil.EMPTY); | |
| 451 | - sQuestion = StrUtil.replace(sQuestion,"\n",StrUtil.EMPTY); | |
| 452 | - sQuestion = sQuestion.toLowerCase(); | |
| 453 | - userQuestionList.add(sQuestion); | |
| 454 | - session.setSUserQuestionList(userQuestionList); | |
| 512 | + addSessionUserQuestionList(session, userInput); | |
| 455 | 513 | } |
| 456 | - return getDynamicTableSqlExec(session, input, userId, userInput,errorSql,errorMessage,iErroCount,historySqlList, aiAgent); | |
| 514 | + return getDynamicTableSqlExec(session, input, userId, userInput,errorSql,errorMessage,iErroCount,historySqlList, aiAgent, cleanSql); | |
| 457 | 515 | }catch (SqlValidateException e){ |
| 458 | 516 | return "本场景没有识别到您的意图<br/> 如果切换场景,点[回首页],如果在本场景下,转换意图,点[清除记忆]"; |
| 459 | 517 | }catch (Exception e){ |
| ... | ... | @@ -472,7 +530,7 @@ public class XlyErpService { |
| 472 | 530 | if (attempt == maxRetries) { |
| 473 | 531 | return resultExplain +"<br/>查询的SQL语句:"+historySqlList; |
| 474 | 532 | } else { |
| 475 | - return getDynamicTableSql( session, input, userId, userInput, attempt,errorSqlOld,errorMessageOld,attempt.toString(),historySqlList, aiAgent); | |
| 533 | + return getDynamicTableSql( session, input, userId, userInput, attempt,errorSqlOld,errorMessageOld,attempt.toString(),historySqlList, aiAgent,cleanSql); | |
| 476 | 534 | } |
| 477 | 535 | } |
| 478 | 536 | } |
| ... | ... | @@ -484,6 +542,19 @@ public class XlyErpService { |
| 484 | 542 | return resultExplain; |
| 485 | 543 | } |
| 486 | 544 | |
| 545 | + private void addSessionUserQuestionList(UserSceneSession session,String userInput){ | |
| 546 | + List<String> userQuestionList = session.getSUserQuestionList(); | |
| 547 | + if(ObjectUtil.isEmpty(userQuestionList)){ | |
| 548 | + userQuestionList = new ArrayList<>(); | |
| 549 | + } | |
| 550 | + String sQuestion = StrUtil.replace(userInput," ",StrUtil.EMPTY); | |
| 551 | + sQuestion = StrUtil.replace(sQuestion,"\t",StrUtil.EMPTY); | |
| 552 | + sQuestion = StrUtil.replace(sQuestion,"\n",StrUtil.EMPTY); | |
| 553 | + sQuestion = sQuestion.toLowerCase(); | |
| 554 | + userQuestionList.add(sQuestion); | |
| 555 | + session.setSUserQuestionList(userQuestionList); | |
| 556 | + } | |
| 557 | + | |
| 487 | 558 | /*** |
| 488 | 559 | * @Author 钱豹 |
| 489 | 560 | * @Date 19:59 2026/3/4 |
| ... | ... | @@ -510,18 +581,15 @@ public class XlyErpService { |
| 510 | 581 | * @return java.lang.String |
| 511 | 582 | * @Description 执行动态sSql |
| 512 | 583 | **/ |
| 513 | - private String getDynamicTableSqlExec(UserSceneSession session,String input,String userId,String userInput,String errorSql,String errorMessage,String iErroCount,String historySqlList,ErpAiAgent aiAgent){ | |
| 584 | + private String getDynamicTableSqlExec(UserSceneSession session,String input,String userId,String userInput,String errorSql,String errorMessage,String iErroCount,String historySqlList,ErpAiAgent aiAgent,String cleanSql){ | |
| 514 | 585 | // 1. 构建自然语言转SQLAgent, |
| 515 | 586 | List<Map<String, Object>> sqlResult; |
| 516 | - String cleanSql = StrUtil.EMPTY; | |
| 517 | 587 | String rawSql; |
| 518 | 588 | String tableStruct = session.getCurrentTool().getSStructureMemo(); |
| 519 | 589 | String sError_mes; |
| 520 | 590 | Boolean doAddSql = false; |
| 521 | 591 | List<ChatMessage> chatMessage = new ArrayList<>(); |
| 522 | 592 | try{ |
| 523 | - //获取缓存动态SQL | |
| 524 | -// cleanSql = getDynamicTableNl2Sql(session,input); | |
| 525 | 593 | //如果之前已查询直接返回 |
| 526 | 594 | if(ObjectUtil.isEmpty(cleanSql)){ |
| 527 | 595 | DynamicTableNl2SqlAiAgent aiDynamicTableNl2SqlAiAgent = createDynamicTableNl2SqlAiAgent(userId, input, session); |
| ... | ... | @@ -575,7 +643,7 @@ public class XlyErpService { |
| 575 | 643 | //插入常用操作 |
| 576 | 644 | if(doAddSql){ |
| 577 | 645 | //执行操作记录表 |
| 578 | - doAiUserAgentQuestion(session,input,cleanSql,chatMessage); | |
| 646 | + doAiUserAgentQuestion(session,input,cleanSql,"MYSQL",chatMessage); | |
| 579 | 647 | } |
| 580 | 648 | String sText = aiAgent.explainSqlResult( |
| 581 | 649 | userId, |
| ... | ... | @@ -594,18 +662,41 @@ public class XlyErpService { |
| 594 | 662 | * @return java.lang.String |
| 595 | 663 | * @Description 获取动态SQL(历史中查询) |
| 596 | 664 | **/ |
| 665 | + private Map<String,Object> getDynamicTableCach(UserSceneSession session,String input){ | |
| 666 | + try{ | |
| 667 | + String searchText = session.getCurrentScene().getSId()+"_"+session.getCurrentTool().getSId()+input; | |
| 668 | + //根据问题查询向量库 | |
| 669 | + Map<String,Object> serMap = aiGlobalAgentQuestionSqlEmitterService.queryAiGlobalAgentQuestionSqlEmitter(searchText, "ai_global_agent_question_sql"); | |
| 670 | + return serMap; | |
| 671 | + }catch (Exception e){ | |
| 672 | + log.error("取是否走缓存异常"); | |
| 673 | + } | |
| 674 | + return null; | |
| 675 | + } | |
| 676 | + | |
| 677 | + /*** | |
| 678 | + * @Author 钱豹 | |
| 679 | + * @Date 17:04 2026/3/19 | |
| 680 | + * @Param [session] | |
| 681 | + * @return java.lang.String | |
| 682 | + * @Description 获取动态SQL(历史中查询) | |
| 683 | + **/ | |
| 597 | 684 | private String getDynamicTableNl2Sql(UserSceneSession session,String input){ |
| 598 | 685 | // String sReidKey = SqlValidateUtil.getsKey( session.getCurrentScene().getSId(), session.getCurrentTool().getSId(), input); |
| 599 | 686 | // Object sSql = redisService.get(sReidKey); |
| 600 | 687 | // if(ObjectUtil.isNotEmpty(sSql)){ |
| 601 | 688 | // return sSql.toString(); |
| 602 | 689 | // } |
| 603 | - String searchText = session.getCurrentScene().getSId()+"_"+session.getCurrentTool().getSId()+input; | |
| 604 | - //SqlValidateUtil.getsKey( session.getCurrentScene().getSId(), session.getCurrentTool().getSId(), SqlValidateUtil.getsQuestion(session.getSUserQuestionList())); | |
| 605 | - //根据问题查询向量库 | |
| 606 | - Map<String,Object> serMap = aiGlobalAgentQuestionSqlEmitterService.queryAiGlobalAgentQuestionSqlEmitter(searchText, "ai_global_agent_question_sql"); | |
| 607 | - if(ObjectUtil.isNotEmpty(serMap)){ | |
| 608 | - return serMap.get("sSqlContent").toString(); | |
| 690 | + try{ | |
| 691 | + String searchText = session.getCurrentScene().getSId()+"_"+session.getCurrentTool().getSId()+input; | |
| 692 | + //SqlValidateUtil.getsKey( session.getCurrentScene().getSId(), session.getCurrentTool().getSId(), SqlValidateUtil.getsQuestion(session.getSUserQuestionList())); | |
| 693 | + //根据问题查询向量库 | |
| 694 | + Map<String,Object> serMap = aiGlobalAgentQuestionSqlEmitterService.queryAiGlobalAgentQuestionSqlEmitter(searchText, "ai_global_agent_question_sql"); | |
| 695 | + if(ObjectUtil.isNotEmpty(serMap)){ | |
| 696 | + return serMap.get("sSqlContent").toString(); | |
| 697 | + } | |
| 698 | + }catch (Exception e){ | |
| 699 | + | |
| 609 | 700 | } |
| 610 | 701 | return null; |
| 611 | 702 | } |
| ... | ... | @@ -639,11 +730,12 @@ public class XlyErpService { |
| 639 | 730 | public void doAiUserAgentQuestion(UserSceneSession session, |
| 640 | 731 | String sQuestion, |
| 641 | 732 | String sSqlContent, |
| 733 | + String cachType, | |
| 642 | 734 | List<ChatMessage> chatMessage |
| 643 | 735 | |
| 644 | 736 | ){ |
| 645 | 737 | MultiThreadPoolServer mts = MultiThreadPoolServer.getInstance(); |
| 646 | - AiUserAgentQuestionThread at = new AiUserAgentQuestionThread(session,sQuestion,sSqlContent,chatMessage); | |
| 738 | + AiUserAgentQuestionThread at = new AiUserAgentQuestionThread(session,sQuestion,sSqlContent,cachType,chatMessage); | |
| 647 | 739 | mts.service(at); |
| 648 | 740 | } |
| 649 | 741 | ... | ... |
src/main/java/com/xly/thread/AiUserAgentQuestionThread.java
| ... | ... | @@ -21,14 +21,16 @@ public class AiUserAgentQuestionThread implements Runnable { |
| 21 | 21 | private UserSceneSession session; |
| 22 | 22 | private String sSqlContent; |
| 23 | 23 | private String sQuestion; |
| 24 | + private String cachType; | |
| 24 | 25 | List<ChatMessage> userMessage; |
| 25 | 26 | public final String sProName="Sp_Ai_AiUserAgentQuestionThread"; |
| 26 | 27 | |
| 27 | - public AiUserAgentQuestionThread(UserSceneSession session,String sQuestion,String sSqlContent, List<ChatMessage> userMessage ) { | |
| 28 | + public AiUserAgentQuestionThread(UserSceneSession session,String sQuestion,String sSqlContent,String cachType, List<ChatMessage> userMessage ) { | |
| 28 | 29 | this.session = session; |
| 29 | 30 | this.sSqlContent = sSqlContent; |
| 30 | 31 | this.sQuestion = sQuestion; |
| 31 | 32 | this.userMessage = userMessage; |
| 33 | + this.cachType = cachType; | |
| 32 | 34 | } |
| 33 | 35 | |
| 34 | 36 | @Override |
| ... | ... | @@ -44,6 +46,7 @@ public class AiUserAgentQuestionThread implements Runnable { |
| 44 | 46 | Map<String, Object> data = getMap(sSceneId, sMethodId,bRedis,sQuestionGroupNo); |
| 45 | 47 | data.put("sQuestion",SqlValidateUtil.getsQuestion(session.getSUserQuestionList())); |
| 46 | 48 | data.put("sId",new UUIDGenerator().next()); |
| 49 | + data.put("cachType",cachType); | |
| 47 | 50 | //插入Redis缓存 |
| 48 | 51 | if(bRedis==1 && ObjectUtil.isNotEmpty(sSqlContent)){ |
| 49 | 52 | String sReidKey = SqlValidateUtil.getsKey( sSceneId, sMethodId, sQuestionGroupNo); |
| ... | ... | @@ -52,7 +55,7 @@ public class AiUserAgentQuestionThread implements Runnable { |
| 52 | 55 | String sKey = sSceneId+"_"+sMethodId +"_"+sQuestion; |
| 53 | 56 | // SqlValidateUtil.getsKey( sSceneId, sMethodId, SqlValidateUtil.getsQuestion(session.getSUserQuestionList())); |
| 54 | 57 | //存入向量库 |
| 55 | - aiGlobalAgentQuestionSqlEmitterService.addAiGlobalAgentQuestionSqlEmitter(sKey,data,sQuestion,sSqlContent,"ai_global_agent_question_sql"); | |
| 58 | + aiGlobalAgentQuestionSqlEmitterService.addAiGlobalAgentQuestionSqlEmitter(sKey,data,sQuestion,sSqlContent,cachType,"ai_global_agent_question_sql"); | |
| 56 | 59 | //调用数据库插入数据库 |
| 57 | 60 | Map<String, Object> searMap = dynamicExeDbService.getDoProMap(sProName, data); |
| 58 | 61 | dynamicExeDbService.getCallPro(searMap, sProName); | ... | ... |
src/main/java/com/xly/tool/DynamicToolProvider.java
| ... | ... | @@ -903,17 +903,18 @@ public class DynamicToolProvider implements ToolProvider { |
| 903 | 903 | Map<String,Object> returnMap = transformationArgs( args, paramDefs); |
| 904 | 904 | return paramDefs.stream() |
| 905 | 905 | .filter(pd -> Boolean.TRUE.equals(pd.getBEmpty()) && pd.getBTipModel()) |
| 906 | - .filter(pd -> | |
| 907 | - (!returnMap.containsKey(pd.getSParam()) | |
| 908 | - || (ObjectUtil.isEmpty(returnMap.get(pd.getSParam()))) | |
| 909 | - || (pd.getSParamValue().startsWith("d") && 0==Double.valueOf (returnMap.get(pd.getSParam()).toString())) | |
| 910 | - ) | |
| 911 | - && (!returnMap.containsKey(pd.getSParamValue()) || (ObjectUtil.isEmpty(returnMap.get(pd.getSParamValue())))) | |
| 912 | - ) | |
| 906 | + .filter(pd -> checkMiss( returnMap, pd)) | |
| 913 | 907 | .map(ParamRule::getSParam) |
| 914 | 908 | .toList(); |
| 915 | 909 | } |
| 916 | 910 | |
| 911 | + private Boolean checkMiss(Map<String,Object> returnMap,ParamRule pd) { | |
| 912 | + Boolean bBhcs = (ObjectUtil.isEmpty(returnMap.get(pd.getSParam()))); | |
| 913 | + Boolean bDbZero = (pd.getSParamValue().startsWith("d") && 0 == Double.valueOf(returnMap.get(pd.getSParam()).toString())); | |
| 914 | + return bDbZero || bBhcs || (!returnMap.containsKey(pd.getSParamValue()) || (ObjectUtil.isEmpty(returnMap.get(pd.getSParamValue())))); | |
| 915 | + } | |
| 916 | + | |
| 917 | + | |
| 917 | 918 | /** |
| 918 | 919 | * 确认后必填参数 |
| 919 | 920 | */ | ... | ... |
src/main/java/com/xly/tts/bean/TTSResponseDTO.java
| ... | ... | @@ -72,6 +72,15 @@ public class TTSResponseDTO implements Serializable { |
| 72 | 72 | private String sReturnType = ReturnTypeCode.MAKEDOWN.getCode(); |
| 73 | 73 | |
| 74 | 74 | private Boolean ErpComplete; |
| 75 | + /** | |
| 76 | + * 数据库类型 X: 向量库 S:数据库 | |
| 77 | + */ | |
| 78 | + private String dbType; | |
| 79 | + | |
| 80 | + /** | |
| 81 | + * 数据库类型 H: 缓存 D: 动态 | |
| 82 | + */ | |
| 83 | + private String dbCach; | |
| 75 | 84 | |
| 76 | 85 | /** |
| 77 | 86 | * 创建失败响应 | ... | ... |
src/main/java/com/xly/tts/service/PythonTtsProxyService.java
| ... | ... | @@ -278,6 +278,8 @@ public class PythonTtsProxyService { |
| 278 | 278 | .sSceneName(aiResponseDTO.getSSceneName()) |
| 279 | 279 | .sMethodName(aiResponseDTO.getSMethodName()) |
| 280 | 280 | .sReturnType(aiResponseDTO.getSReturnType()) |
| 281 | + .dbType(aiResponseDTO.getDbType()) | |
| 282 | + .dbCach(aiResponseDTO.getDbCach()) | |
| 281 | 283 | .sCommonts(BusinessCode.COMMONTS.getMessage()) |
| 282 | 284 | .timestamp(System.currentTimeMillis()) |
| 283 | 285 | .textLength((aiText + systemText).length()) | ... | ... |
src/main/resources/application.yml
| ... | ... | @@ -127,11 +127,12 @@ langchain4j: |
| 127 | 127 | # 聊天模型配置(用于一般对话) |
| 128 | 128 | base-url: http://112.82.245.194:11434 |
| 129 | 129 | chat-model-name: qwen2.5:7b-instruct |
| 130 | +# chat-model-name: qwen3:14b | |
| 130 | 131 | # chat-model-name: qwen3.5:9b |
| 131 | 132 | # SQL/代码模型配置(专门用于代码和SQL生成) |
| 132 | 133 | sql-model-name: qwen2.5-coder:7b |
| 134 | +# sql-model-name: SimonPu/qwen3-coder:30B-Instruct_Q4_K_XL | |
| 133 | 135 | # sql-model-name: qwen2.5-coder:32b |
| 134 | -# sql-model-name: mdq100/qwen3.5-coder:35b | |
| 135 | 136 | # 或者如果两个模型在同一服务器,可以使用同一个URL |
| 136 | 137 | |
| 137 | 138 | # ollama: |
| ... | ... | @@ -140,7 +141,9 @@ langchain4j: |
| 140 | 141 | # chat-model-name: qwen3.5:9b |
| 141 | 142 | # # SQL/代码模型配置(专门用于代码和SQL生成) |
| 142 | 143 | # sql-model-name: mdq100/qwen3.5-coder:35b |
| 143 | - | |
| 144 | +tesseract: | |
| 145 | + # 这里填写你存放 tessdata 的目录路径,注意不是 tessdata 文件夹本身 | |
| 146 | + datapath: D:/xlyweberp/Tesseract-OCR/tessdata | |
| 144 | 147 | mybatis: |
| 145 | 148 | mapper-locations: classpath:mapper/*.xml |
| 146 | 149 | type-aliases-package: com.xly.entity | ... | ... |