diff --git a/pom.xml b/pom.xml index b75e830..e693acd 100644 --- a/pom.xml +++ b/pom.xml @@ -106,13 +106,41 @@ spring-boot-starter-webflux + net.sourceforge.tess4j tess4j ${tess4j.version} - compile + + + io.github.mymonstercat + rapidocr + 0.0.7 + + + + io.github.mymonstercat + rapidocr-onnx-platform + 0.0.7 + + + + + + + + + + + + commons-io + commons-io + 2.15.1 + + + org.springframework.boot spring-boot-starter-thymeleaf @@ -335,12 +363,7 @@ ${langchain4j.version} - - - com.microsoft.onnxruntime - onnxruntime - 1.17.0 - + diff --git a/src/main/java/com/xly/agent/ErpAiAgent.java b/src/main/java/com/xly/agent/ErpAiAgent.java index e120d7d..ba4f7d8 100644 --- a/src/main/java/com/xly/agent/ErpAiAgent.java +++ b/src/main/java/com/xly/agent/ErpAiAgent.java @@ -11,14 +11,14 @@ import dev.langchain4j.service.V; */ public interface ErpAiAgent { @SystemMessage(""" - 1. 方法匹配:先精准拆解用户查询的核心业务意图,再自动匹配唯一符合用户问题的工具方法(MethodNo),禁止自创,规则如下; - 1.1 匹配方法时,无需考虑工具描述(@TOOL)中 1.必填参数,2.选填参数,示例,parameters内容 四个部分的内容; - 1.2 匹配方法时,只关注工具描述(@TOOL)中 “当用户” 和 “时,必须调用本工具”两个短语之间的内容; - 1.3 调用工具前,不需要询问用户提供缺失的参数 - 2. 参数提取:提取该工具的全部参数,与描述完全一致,严格按标注类型赋值,规则如下: - 2.1 数字无引号,为空时禁止赋值0; - 2.2 如果有空格需要去掉空格后再提取。 - """) + 1. 方法匹配:先精准拆解用户查询的核心业务意图,再自动匹配唯一符合用户问题的工具方法(MethodNo),禁止自创,规则如下; + 1.1 匹配方法时,无需考虑工具描述(@TOOL)中 1.必填参数,2.选填参数,示例,parameters内容 四个部分的内容; + 1.2 匹配方法时,只关注工具描述(@TOOL)中 “当用户” 和 “时,必须调用本工具”两个短语之间的内容; + 1.3 调用工具前,不需要询问用户提供缺失的参数 + 2. 参数提取:提取该工具的全部参数,与描述完全一致,严格按标注类型赋值,规则如下: + 2.1 数字无引号,为空时禁止赋值0; + 2.2 如果有空格需要去掉空格后再提取。 + """) @UserMessage("用户输入:{{userInput}}") String chat(@MemoryId String userId, @V("userInput") String userInput); @@ -27,16 +27,16 @@ public interface ErpAiAgent { * 入参:用户问题、执行的SQL、表结构、JSON格式结果 */ @SystemMessage(""" - 你是专业的业务数据分析师,严格遵循以下**通用规则**解释查询结果,适用于所有业务场景: - 1. 解释风格:贴合业务场景,无任何SQL专业术语,用口语化、简洁的商业语言说明,避免技术词汇; - 2. 数据准确:严格按照JSON执行结果解释,不夸大、不遗漏、不编造数据,数值与结果完全一致; - 3. 输出格式:仅返回解释内容,不要列出ID,无多余标题、换行、符号,结果为空时直接返回“未查询到相关数据” - 3.1. 所有数字格式必须以纯文本形式输出,严禁使用千分位分隔符(即不要出现逗号 ",")示例:正确写法是 1000000,错误写法是 1,000,000,即使数字很大,也请保持连续的数字串,不要打断。 - 3.2 所有日期请转换为 YYYY-MM-DD 格式(例如:2026-03-15),严禁包含时间部分(如小时、分钟、秒)(例如:2026-03-15 00:00:00),也不要包含时区信息。” - 3.3. 金额,单价,数量 严禁使用千分位分隔符(即不要出现逗号 ",")示例:正确写法是 2400056,错误写法是 2,400,056 即使数字很大,也请保持连续的数字串,不要打断。 - 4. 长度控制:单条解释不超过150字,条理清晰,重点突出核心数据/趋势; - 5. 禁止重复:不重复用户问题、不重复执行的SQL语句,仅针对结果做业务解读。 - """) + 你是专业的业务数据分析师,严格遵循以下**通用规则**解释查询结果,适用于所有业务场景: + 1. 解释风格:贴合业务场景,无任何SQL专业术语,用口语化、简洁的商业语言说明,避免技术词汇; + 2. 数据准确:严格按照JSON执行结果解释,不夸大、不遗漏、不编造数据,数值与结果完全一致; + 3. 输出格式:仅返回解释内容,不要列出ID,无多余标题、换行、符号,结果为空时直接返回“未查询到相关数据” + 3.1. 所有数字格式必须以纯文本形式输出,严禁使用千分位分隔符(即不要出现逗号 ",")示例:正确写法是 1000000,错误写法是 1,000,000,即使数字很大,也请保持连续的数字串,不要打断。 + 3.2 所有日期请转换为 YYYY-MM-DD 格式(例如:2026-03-15),严禁包含时间部分(如小时、分钟、秒)(例如:2026-03-15 00:00:00),也不要包含时区信息。” + 3.3. 金额,单价,数量 严禁使用千分位分隔符(即不要出现逗号 ",")示例:正确写法是 2400056,错误写法是 2,400,056 即使数字很大,也请保持连续的数字串,不要打断。 + 4. 长度控制:单条解释不超过150字,条理清晰,重点突出核心数据/趋势; + 5. 禁止重复:不重复用户问题、不重复执行的SQL语句,仅针对结果做业务解读。 + """) @UserMessage(""" 【业务场景表结构信息】 表结构详情:{{tableStruct}} @@ -57,119 +57,143 @@ public interface ErpAiAgent { * 入参:用户问题、执行的SQL、表结构、JSON格式结果 */ @SystemMessage(""" - 你是专业的业务数据分析师,请分析以下查询结果: - 【用户问题】 - {{userInput}} - 【数据字段说明】 - {{sMilvusFiledDescription}} - 【查询结果数据(JSON格式)】 - {{result}} - 【分析要求】 - 1. 解释风格:贴合业务场景,无任何SQL专业术语,用口语化、简洁的商业语言说明,避免技术词汇; - 2. 数据准确:严格按照JSON执行结果解释,不夸大、不遗漏、不编造数据,数值与结果完全一致; - 3. 输出格式: - 3.1. 如果用户要求"表格形式展示",先输出简短文字说明,然后输出Markdown格式的表格 - 3.2. 如果用户未要求表格,仅返回解释内容,不要列出ID,无多余标题、换行、符号 - 3.3. 结果为空时直接返回"未查询到相关数据" - 3.4. 所有数字格式必须以纯文本形式输出,严禁使用千分位分隔符(即不要出现逗号 ",") - 3.5. 所有日期请转换为 YYYY-MM-DD 格式,严禁包含时间部分 - 4. 长度控制:单条解释不超过150字,条理清晰,重点突出核心数据/趋势; - 5. 禁止重复:不重复用户问题、不重复执行的SQL语句,仅针对结果做业务解读。 - """) + 你是专业的业务数据分析师,请分析以下查询结果: + 【用户问题】 + {{userInput}} + 【数据字段说明】 + {{sMilvusFiledDescription}} + 【查询结果数据(JSON格式)】 + {{result}} + 【分析要求】 + 1. 解释风格:贴合业务场景,无任何SQL专业术语,用口语化、简洁的商业语言说明,避免技术词汇; + 2. 数据准确:严格按照JSON执行结果解释,不夸大、不遗漏、不编造数据,数值与结果完全一致; + 3. 输出格式: + 3.1. 如果用户要求"表格形式展示",先输出简短文字说明,然后输出Markdown格式的表格 + 3.2. 如果用户未要求表格,仅返回解释内容,不要列出ID,无多余标题、换行、符号 + 3.3. 结果为空时直接返回"未查询到相关数据" + 3.4. 所有数字格式必须以纯文本形式输出,严禁使用千分位分隔符(即不要出现逗号 ",") + 3.5. 所有日期请转换为 YYYY-MM-DD 格式,严禁包含时间部分 + 4. 长度控制:单条解释不超过150字,条理清晰,重点突出核心数据/趋势; + 5. 禁止重复:不重复用户问题、不重复执行的SQL语句,仅针对结果做业务解读。 + """) @UserMessage(""" - 【用户查询】 - {{userInput}} - 【字段说明】 - {{sMilvusFiledDescription}} - 【查询结果】 - 用户原始查询:{{userInput}} - 执行查询向量库后结果(JSON格式):{{result}} - 请根据上述信息+通用规则,对查询结果做业务解释: - """) + 【用户查询】 + {{userInput}} + 【字段说明】 + {{sMilvusFiledDescription}} + 【查询结果】 + 用户原始查询:{{userInput}} + 执行查询向量库后结果(JSON格式):{{result}} + 请根据上述信息+通用规则,对查询结果做业务解释: + """) String explainMilvusResult(@MemoryId String userId, @V("userInput") String userInput, @V("sMilvusFiledDescription") String sMilvusFiledDescription, @V("result") String result); - /** - * AI路由判断接口 - * true: 走聚合查询(MySQL) - * false: 走向量检索(Milvus) - */ -// @SystemMessage(""" -// 你是一个智能查询路由专家,请根据用户需求判断应该使用哪种查询方式。 -// -// 判断标准: -// 1. 返回 true(聚合查询/MySQL)的场景: -// - 需要计算统计指标:总数、总和、平均值、最大/最小值、占比 -// - 需要数据汇总:分组统计、排行榜、TopN -// - 包含关键词:统计、求和、汇总、排名、平均、数量、总额、最高、最低、占比、分组、分析、趋势 -// - 示例:统计本月销售总额、查询销量前10的商品、各品类占比分析 -// -// 2. 返回 false(向量检索/Milvus)的场景: -// - 查询明细数据:XXX的销售订单明细、XXX的客户信息、具体内容详情 -// - 查找相似内容:根据语义查找相关文档、推荐相似商品 -// - 模糊匹配:不确定具体关键词,需要语义理解 -// - 内容检索:查找包含特定概念的文档 -// - 包含关键词:明细、详情、查询明细、查找、搜索、匹配、推荐、相似、相关、类似 -// - 示例:李留记的销售订单明细、查询关于人工智能的文档、找相似的图片 -// -// 重要规则: -// - 只返回 true 或 false,不要返回其他内容 -// - 不要解释,不要添加额外文字 -// - 如果用户要求"表格形式展示",返回 false(明细查询) -// - 如果用户指定具体人名、具体对象,返回 false(明细查询) -// """) -// @UserMessage("用户需求:{{userInput}}") @SystemMessage(""" - 你是一个智能查询路由专家。请根据【用户需求】,只返回 true 或 false - - 如果用户需求包含以下关键词:统计、求和、汇总、排名、TopN、平均、数量、总额、最高、最低、占比、分组,则返回true - - 如果用户需求属于模糊匹配、普通语义检索,查询明细,(例如:查询报价单明细,查询客户信息),则返回false - - 查询明细数据:XXX的销售订单明细、XXX的客户信息、具体内容详情,则返回false - - 模糊匹配:不确定具体关键词,需要语义理解,则返回false + 你是一个智能查询路由专家。请根据【用户需求】,只返回 true 或 false。 + + 【最高优先级规则 - 必须首先判断】 + 如果用户需求包含以下任一关键词,**直接返回 false**,不再进行其他判断: + - 明细、详情、详细信息、详细内容、具体内容 + - 查询...明细、...详情、...记录、...列表、...清单 + + 重要:只要出现以上关键词,说明用户需要的是明细数据查询,而非统计分析。 + + 【统计类关键词 - 仅在满足最高优先级规则后才判断】 + 只有当用户需求不包含上述明细类关键词时,才检查是否包含以下关键词: + 统计、求和、汇总、排名、TopN、平均、数量、总额、最高、最低、占比、分组 + + - 如果包含,返回 true + - 否则返回 false + + 【判断示例】 + - \"查询中科精工集团的彩盒类产品的报价单明细\" → false(包含\"明细\") + - \"统计各产品销售额\" → true(包含\"统计\",且无明细关键词) + - \"查询客户张三信息\" → false(无统计关键词,无明细关键词) + - \"销售额排名前10的产品\" → true(包含\"排名\",且无明细关键词) + - \"查看销售订单明细\" → false(包含\"明细\") """) @UserMessage(""" - 【用户需求】 + 【用户需求】 {{userInput}} """) Boolean routeQuery(@MemoryId String userId, @V("userInput") String userInput); /** - * 生成 Milvus 过滤条件 + * 生成 Milvus 过滤条件(适配 Milvus v2.3.9) */ @SystemMessage(""" - MILVUS 标量过滤条件生成规则(严格遵守): - 1. 语法规范: - - 允许的操作符:==, !=, like - - 逻辑组合:&& (AND), || (OR) - - 所有字段都是字符串类型,值必须使用单引号包裹 - - 字符串中的单引号需要转义:'O''Reilly' - 2. 可用字段(只能使用这些字段): - - {{sMilvusFiled}} - 字段说明: - - {{sMilvusFiledDescription}} - 3. 重要规则: - - 只使用上述可用字段,不要创建新字段 - - 如果用户提到了文档类型(如"报价单"、"订单"等),但可用字段中没有类型字段,则忽略该条件 - - 只提取有明确值的字段条件 - 4. 生成规则: - - 如果没有提取到任何具体条件,返回空字符串 - - 从用户输入中提取明确的字段条件 - - 识别模式:字段名 + 操作符 + 值 - - 示例: - * "单据号 INV001" → sBillNo == 'INV001' - * "客户编号 C001" → sCustomerNo == 'C001' - * "销售人员张三" → sSalesManName == '张三' - * "产品包含手机" → sProductStyle like '%手机%' - 5. 输出格式: - - 仅返回纯过滤条件,无任何解释、换行、备注 - - 单条件:sBillNo == 'INV001' - - 多条件:(sBillNo == 'INV001' && sCustomerNo == 'C001') - - 无条件:直接返回空字符串 - """) - @UserMessage(""" + MILVUS 标量过滤条件生成规则(严格遵守 - 当前版本 v2.3.9): + + 【重要输出约束】 + - 必须返回有效的 Milvus 过滤条件表达式 + - 禁止返回 true 或 false + - 禁止返回空字符串以外的任何非表达式内容 + - 无条件时只返回空字符串 "" + + 1. 语法规范: + - 允许的操作符:==, !=, like + - 逻辑组合:&& (AND), || (OR) + - 所有字段都是字符串类型,值必须使用单引号包裹 + - 字符串中的单引号需要转义:'O''Reilly' + + 2. 【重要】Milvus v2.3.9 like 操作符限制: + - ✅ 支持:like '关键字%'(前缀匹配,以关键字开头) + - ❌ 不支持:like '%关键字%'(包含匹配) + - ❌ 不支持:like '%关键字'(后缀匹配) + + 3. 可用字段(只能使用这些字段): + - {{sMilvusFiled}} + 字段说明: + - {{sMilvusFiledDescription}} + + 4. 提取规则: + - 只使用上述可用字段,不要创建新字段 + - 如果用户提到了文档类型(如"报价单"、"订单"等),但可用字段中没有类型字段,则忽略该条件 + + 【精确匹配规则】: + - 当用户提供明确值时:字段 == '值' + * 例如:"客户名称中科精工" → sCustomerName == '中科精工' + * 例如:"单据号 INV001" → sBillNo == 'INV001' + + 5. 时间处理规则: + - 当前系统时间:{{sDataNow}}(格式:yyyy-MM-dd) + - 相对时间转换规则: + * "今天/今日" → 当天 00:00:00 到 23:59:59 + * "昨天" → 前一天 00:00:00 到 23:59:59 + * "本周" → 本周一 00:00:00 到本周日 23:59:59 + * "本月" → 本月1日 00:00:00 到本月最后一天 23:59:59 + * "本年" → 本年1月1日 00:00:00 到本年12月31日 23:59:59 + * "近X天" → 从 X 天前 00:00:00 到今天 23:59:59 + - 日期转时间戳:所有日期转换为 Unix 时间戳(秒) + - 时间范围格式:字段 >= 起始时间戳 && 字段 <= 结束时间戳 + - 如果没有明确的时间需求,不要添加任何时间过滤条件 + + 6. 示例: + ✅ 正确输出: + - "客户名称中科精工" → sCustomerName == '中科精工' + - "中科精工的报价单明细" → sCustomerName == '中科精工' + - "产品以彩盒开头" → sProductStyle like '彩盒%' + - "无条件" → "" + + ❌ 错误输出(禁止): + - "中科精工的报价单明细" → true + - "中科精工的报价单明细" → false + - "中科精工的报价单明细" → 1 + + 7. 输出格式: + - 仅返回纯过滤条件,无任何解释、换行、备注 + - 单条件:sCustomerName == '中科精工' + - 多条件:(sCustomerName == '中科精工' && sProductStyle like '彩盒%') + - 无条件:直接返回空字符串 "" + """) + @UserMessage(""" 【用户查询】 - {{userInput}} + 【当前时间】 + - {{sDataNow}} 【可用字段】 - {{sMilvusFiled}} 【字段说明】 @@ -178,5 +202,7 @@ public interface ErpAiAgent { String getMilvusFilter(@MemoryId String userId, @V("userInput") String userInput, @V("sMilvusFiled") String sMilvusFiled, - @V("sMilvusFiledDescription") String sMilvusFiledDescription); + @V("sMilvusFiledDescription") String sMilvusFiledDescription, + @V("sDataNow") String sDataNow); + } diff --git a/src/main/java/com/xly/entity/AiResponseDTO.java b/src/main/java/com/xly/entity/AiResponseDTO.java index bc5fa66..72f84e7 100644 --- a/src/main/java/com/xly/entity/AiResponseDTO.java +++ b/src/main/java/com/xly/entity/AiResponseDTO.java @@ -139,6 +139,17 @@ public class AiResponseDTO implements Serializable { */ private String errorCode; + /** + * 数据库类型 X: 向量库 S:数据库 + */ + private String dbType; + + /** + * 数据库类型 H: 缓存 D: 动态 + */ + private String dbCach; + + // ============ 便捷方法 ============ /** diff --git a/src/main/java/com/xly/entity/ToolMeta.java b/src/main/java/com/xly/entity/ToolMeta.java index b2874c4..b2e4542 100644 --- a/src/main/java/com/xly/entity/ToolMeta.java +++ b/src/main/java/com/xly/entity/ToolMeta.java @@ -50,5 +50,6 @@ public class ToolMeta { private String sVectorfiled; private String sVectorjson; private String sVectorfiledAll; + private String sVectorfiledShow; } diff --git a/src/main/java/com/xly/entity/UserSceneSession.java b/src/main/java/com/xly/entity/UserSceneSession.java index 8de87f9..76bf994 100644 --- a/src/main/java/com/xly/entity/UserSceneSession.java +++ b/src/main/java/com/xly/entity/UserSceneSession.java @@ -67,6 +67,16 @@ public class UserSceneSession { private Map args; /** + * 数据库类型 X: 向量库 S:数据库 + */ + private String dbType; + + /** + * 数据库类型 H: 缓存 D: 动态 + */ + private String dbCach; + + /** * 构建场景选择提示语:展示权限内场景,引导用户选择 * @return 自然语言提示语 */ diff --git a/src/main/java/com/xly/milvus/service/AiGlobalAgentQuestionSqlEmitterService.java b/src/main/java/com/xly/milvus/service/AiGlobalAgentQuestionSqlEmitterService.java index c98c0d7..c560544 100644 --- a/src/main/java/com/xly/milvus/service/AiGlobalAgentQuestionSqlEmitterService.java +++ b/src/main/java/com/xly/milvus/service/AiGlobalAgentQuestionSqlEmitterService.java @@ -12,7 +12,7 @@ public interface AiGlobalAgentQuestionSqlEmitterService { * @return void * @Description 插入向量库 **/ - void addAiGlobalAgentQuestionSqlEmitter(String sKey,Map data, String sQuestion, String sSqlContent, String collectionName); + void addAiGlobalAgentQuestionSqlEmitter(String sKey,Map data,String sQuestion,String sSqlContent,String cachType,String collectionName); Map queryAiGlobalAgentQuestionSqlEmitter(String searchText, String collectionName); diff --git a/src/main/java/com/xly/milvus/service/MilvusService.java b/src/main/java/com/xly/milvus/service/MilvusService.java index 178052e..7960561 100644 --- a/src/main/java/com/xly/milvus/service/MilvusService.java +++ b/src/main/java/com/xly/milvus/service/MilvusService.java @@ -54,5 +54,7 @@ public interface MilvusService { * @return java.util.Map * @Description 获取配置 **/ - Map getMilvusFiled(String sVectorfiled,String sVectorfiledAll); + Map getMilvusFiled(String sVectorfiled,String sVectorfiledAll,String sVectorfiledShow); + + boolean isValidMilvusFilter(String milvusFilter); } \ No newline at end of file diff --git a/src/main/java/com/xly/milvus/service/impl/AiGlobalAgentQuestionSqlEmitterServiceImpl.java b/src/main/java/com/xly/milvus/service/impl/AiGlobalAgentQuestionSqlEmitterServiceImpl.java index bacaa16..d178dcf 100644 --- a/src/main/java/com/xly/milvus/service/impl/AiGlobalAgentQuestionSqlEmitterServiceImpl.java +++ b/src/main/java/com/xly/milvus/service/impl/AiGlobalAgentQuestionSqlEmitterServiceImpl.java @@ -2,6 +2,7 @@ package com.xly.milvus.service.impl; import cn.hutool.core.collection.ConcurrentHashSet; import cn.hutool.core.util.ObjectUtil; +import cn.hutool.core.util.StrUtil; import com.google.gson.JsonArray; import com.google.gson.JsonObject; import com.xly.milvus.service.AiGlobalAgentQuestionSqlEmitterService; @@ -52,16 +53,19 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent * @Description 插入数据 **/ @Override - public void addAiGlobalAgentQuestionSqlEmitter(String sKey,Map data,String sQuestion,String sSqlContent,String collectionName) { + public void addAiGlobalAgentQuestionSqlEmitter(String sKey,Map data,String sQuestion,String sSqlContent,String cachType,String collectionName) { // 向量化 List vector = vectorizationService.textToVector(sKey); if (vector == null || vector.isEmpty()) { throw new RuntimeException("向量化失败"); } + if(ObjectUtil.isEmpty(sSqlContent)){ + sSqlContent = StrUtil.EMPTY; + } // 2. 转换为Milvus格式 - JsonObject row = convertToMilvusRow(data, vector,sQuestion,sSqlContent,sKey); + JsonObject row = convertToMilvusRow(data, vector,sQuestion,sSqlContent,cachType,sKey); //创建集合 // createCollection(collectionName); @@ -86,7 +90,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent Map searchParams = new HashMap<>(); searchParams.put("nprobe", 10); // 对于 IP 度量,相似度范围在 [minScore, maxScore] - searchParams.put("radius", 0.9); // 最小相似度 + searchParams.put("radius", 0.98); // 最小相似度 searchParams.put("range_filter", 1); // 最大相似度 // 1. 确保集合已加载 ensureCollectionLoaded(collectionName); @@ -108,7 +112,6 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent // .limit(100) // .build(); // QueryResp queryResp = milvusClient.query(queryReq); - // 3. 创建 Milvus FloatVec 对象 FloatVec floatVec = new FloatVec(floatArray); // 4. 构建搜索请求 @@ -118,7 +121,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent .annsField("vector") // 向量字段名 .topK(10) // 返回最相似的10条 .metricType(IndexParam.MetricType.IP) // 内积相似度 - .outputFields(Arrays.asList("sQuestion", "sSqlContent", "data_id", "create_time","metadata")) + .outputFields(Arrays.asList("sQuestion", "sSqlContent", "data_id","db_name", "create_time","metadata")) .searchParams(searchParams) .build(); // 5. 执行搜索 @@ -230,7 +233,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent /** * 从实体对象构建Milvus插入数据 */ - public JsonObject convertToMilvusRow(Map data, List vector,String sQuestion,String sSqlContent,String sKey) { + public JsonObject convertToMilvusRow(Map data, List vector,String sQuestion,String sSqlContent,String cachType,String sKey) { JsonObject row = new JsonObject(); // 添加向量 @@ -242,6 +245,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent row.addProperty("data_id", data.get("sId").toString()); row.addProperty("sQuestion", sQuestion); row.addProperty("sSqlContent", sSqlContent); + row.addProperty("cachType", cachType); // 创建时间字段 - 必须提供! row.addProperty("create_time", System.currentTimeMillis()); // 创建时间字段 - 必须提供! @@ -305,23 +309,33 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent CreateCollectionReq.FieldSchema.builder() .name("sQuestion") .dataType(DataType.VarChar) - .maxLength(1000) + .maxLength(5000) .description("用户问题") .build(), - // 4. SQL内容字段 + // 4. SQL内容字段 - 设置为可空 CreateCollectionReq.FieldSchema.builder() .name("sSqlContent") .dataType(DataType.VarChar) - .maxLength(5000) // SQL可能较长 + .maxLength(50000) // SQL可能较长 + .isPrimaryKey(false) + .isNullable(true) // 设置为 true,允许为空 .description("SQL语句") .build(), + // 4. 缓存类型 + CreateCollectionReq.FieldSchema.builder() + .name("cachType") + .dataType(DataType.VarChar) + .maxLength(100) // 缓存类型 + .description("缓存类型") + .build(), + // 5. 数据ID字段 CreateCollectionReq.FieldSchema.builder() .name("data_id") .dataType(DataType.VarChar) - .maxLength(100) + .maxLength(500) // 增加最大长度 .description("原始数据ID") .build(), @@ -341,7 +355,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent CreateCollectionReq.FieldSchema.builder() .name("sKey") .dataType(DataType.VarChar) - .maxLength(100) + .maxLength(1000) // 增加最大长度 .description("存入的vector转换前数据") .build() ); @@ -667,7 +681,7 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent IndexParam questionIndex = IndexParam.builder() .fieldName("sQuestion") .indexName("idx_question") - .indexType(IndexParam.IndexType.INVERTED) // 倒排索引 + .indexType(IndexParam.IndexType.TRIE) // 倒排索引 .build(); allIndexParams.add(questionIndex); @@ -675,14 +689,21 @@ public class AiGlobalAgentQuestionSqlEmitterServiceImpl implements AiGlobalAgent IndexParam idIndex = IndexParam.builder() .fieldName("data_id") .indexName("idx_data_id") - .indexType(IndexParam.IndexType.INVERTED) + .indexType(IndexParam.IndexType.TRIE) .build(); allIndexParams.add(idIndex); + IndexParam idx_cach_type = IndexParam.builder() + .fieldName("cachType") + .indexName("idx_cach_type") + .indexType(IndexParam.IndexType.TRIE) + .build(); + allIndexParams.add(idx_cach_type); + IndexParam sKey = IndexParam.builder() .fieldName("sKey") .indexName("s_key") - .indexType(IndexParam.IndexType.INVERTED) + .indexType(IndexParam.IndexType.TRIE) .build(); allIndexParams.add(sKey); diff --git a/src/main/java/com/xly/milvus/service/impl/MilvusServiceImpl.java b/src/main/java/com/xly/milvus/service/impl/MilvusServiceImpl.java index e09b5b3..8c4ebcb 100644 --- a/src/main/java/com/xly/milvus/service/impl/MilvusServiceImpl.java +++ b/src/main/java/com/xly/milvus/service/impl/MilvusServiceImpl.java @@ -264,24 +264,29 @@ public class MilvusServiceImpl implements MilvusService { * @Description 返回组装动态内容 **/ @Override - public Map getMilvusFiled(String sVectorfiled,String sVectorfiledAll){ + public Map getMilvusFiled(String sVectorfiled,String sVectorfiledAll,String sVectorfiledShow){ List sFileds = new ArrayList<>(); + List filedsShow = new ArrayList<>(); List sFiledDescriptions = new ArrayList<>(); List sFiledDescriptionsAll = new ArrayList<>(); List> titleList = new LinkedList<>(); String[] sVectorfiledArray = sVectorfiled.split(","); for(String sVectorfiledOne : sVectorfiledArray){ - Map title = new HashMap<>(); - String[] sVectorfiledOneArray = sVectorfiledOne.split(":"); String sDescriptions = sVectorfiledOneArray[0]; String sName = sVectorfiledOneArray[1]; sFileds.add(sName); // 处理描述中可能包含的换行,保持缩进一致 -// String formattedDesc = sDescriptions.replace("\n", "\n "); -// sFiledDescriptions.add(String.format(" - %s: %s", sName, formattedDesc)); String formattedDesc =String.format("%s: %s", sName, sDescriptions); sFiledDescriptions.add(formattedDesc); + } + String[] sVectorfiledShowArray = sVectorfiledShow.split(","); + for(String sVectorfiledShowOne : sVectorfiledShowArray){ + Map title = new HashMap<>(4); + String[] sVectorfiledOneArray = sVectorfiledShowOne.split(":"); + String sDescriptions = sVectorfiledOneArray[0]; + String sName = sVectorfiledOneArray[1]; + filedsShow.add(sName); title.put("sName",sName); title.put("sTitle",sDescriptions); titleList.add(title); @@ -291,9 +296,6 @@ public class MilvusServiceImpl implements MilvusService { String[] sVectorfiledOneArray = sVectorfiledOne.split(":"); String sDescriptions = sVectorfiledOneArray[0]; String sName = sVectorfiledOneArray[1]; - // 处理描述中可能包含的换行,保持缩进一致 -// String formattedDesc = sDescriptions.replace("\n", "\n "); -// sFiledDescriptions.add(String.format(" - %s: %s", sName, formattedDesc)); String formattedDesc =String.format("%s: %s", sName, sDescriptions); sFiledDescriptionsAll.add(formattedDesc); } @@ -301,7 +303,7 @@ public class MilvusServiceImpl implements MilvusService { rMap.put("sMilvusFiled", String.join(",", sFileds)); rMap.put("sMilvusFiledDescription", String.join(",", sFiledDescriptions)); rMap.put("sMilvusFiledDescriptionAll", String.join(",", sFiledDescriptionsAll)); - rMap.put("sFileds", sFileds); + rMap.put("filedsShow", filedsShow); rMap.put("title", titleList); return rMap; } @@ -334,9 +336,6 @@ public class MilvusServiceImpl implements MilvusService { fields.add("metadata"); // 3. 创建 Milvus FloatVec 对象 FloatVec floatVec = new FloatVec(floatArray); - log.info("查询向量库条件{}",milvusFilter); - milvusFilter = isValidMilvusFilter(milvusFilter)?milvusFilter : null; - log.info("实际查询向量库条件{}",milvusFilter); // 4. 构建搜索请求 SearchReq searchReq = SearchReq.builder() .collectionName(collectionName) @@ -357,7 +356,7 @@ public class MilvusServiceImpl implements MilvusService { /** - * 判断 Milvus 过滤条件是否有效 + * 判断 Milvus 过滤条件是否有效(支持 TEXT_MATCH 全文检索) * @param milvusFilter 过滤条件字符串 * @return true: 有效条件, false: 无效条件 */ @@ -374,42 +373,135 @@ public class MilvusServiceImpl implements MilvusService { return false; } - // 3. 检查是否包含有效的操作符 - boolean hasValidOperator = filter.matches(".*[=!<>]=?.*") || filter.contains(" like "); + // 3. 【修改】检查是否包含有效的操作符(增加 TEXT_MATCH 支持) + boolean hasValidOperator = filter.matches(".*[=!<>]=?.*") + || filter.contains(" like ") + || filter.toUpperCase().contains("TEXT_MATCH"); + if (!hasValidOperator) { return false; } - // 4. 检查字符串值是否使用单引号包裹 - // 匹配模式:字段名 操作符 '值' - Pattern pattern = Pattern.compile("[a-zA-Z_][a-zA-Z0-9_]*\\s*(==|!=|>=|<=|>|<|like)\\s*('[^']*'|\\d+)"); - Matcher matcher = pattern.matcher(filter); - - // 5. 对于复合条件,递归检查 + // 4. 对于复合条件,递归检查 if (filter.contains("&&") || filter.contains("||")) { // 分割复合条件(简单处理,生产环境需要更完善的解析) - String[] conditions = filter.split("&&|\\|\\|"); + String[] conditions = splitConditions(filter); for (String condition : conditions) { - condition = condition.trim().replaceAll("^[()]+|[()]+$", ""); // 去除括号 - if (!isValidSimpleCondition(condition)) { + if (!isValidCondition(condition)) { return false; } } return true; } - // 6. 检查简单条件 - return isValidSimpleCondition(filter); + // 5. 检查单个条件 + return isValidCondition(filter); + } + + /** + * 拆分复合条件(处理括号嵌套) + */ + private String[] splitConditions(String filter) { + List conditions = new ArrayList<>(); + StringBuilder current = new StringBuilder(); + int parentheses = 0; + + for (int i = 0; i < filter.length(); i++) { + char c = filter.charAt(i); + + if (c == '(') { + parentheses++; + current.append(c); + } else if (c == ')') { + parentheses--; + current.append(c); + } else if (parentheses == 0 && (filter.startsWith("&&", i) || filter.startsWith("||", i))) { + // 遇到顶层操作符,分割条件 + if (current.length() > 0) { + conditions.add(current.toString().trim()); + current = new StringBuilder(); + } + i += 1; // 跳过操作符的第二个字符 + } else { + current.append(c); + } + } + + if (current.length() > 0) { + conditions.add(current.toString().trim()); + } + + return conditions.toArray(new String[0]); } /** - * 验证简单条件(不包含 && 和 ||) + * 验证单个条件(支持 TEXT_MATCH 和普通条件) + */ + private boolean isValidCondition(String condition) { + if (condition == null || condition.trim().isEmpty()) { + return false; + } + + condition = condition.trim(); + // 去除外层括号 + while (condition.startsWith("(") && condition.endsWith(")")) { + condition = condition.substring(1, condition.length() - 1).trim(); + } + + // 1. 【新增】检查 TEXT_MATCH 语法 + if (condition.toUpperCase().contains("TEXT_MATCH")) { + return isValidTextMatch(condition); + } + + // 2. 检查普通条件 + return isValidSimpleCondition(condition); + } + + /** + * 【新增】验证 TEXT_MATCH 语法 + * 格式:TEXT_MATCH(字段名, '关键词') + * 或:TEXT_MATCH(字段名, "关键词") + */ + private boolean isValidTextMatch(String condition) { + // 匹配 TEXT_MATCH(字段名, '关键词') 或 TEXT_MATCH(字段名, "关键词") + Pattern pattern = Pattern.compile( + "TEXT_MATCH\\s*\\(\\s*([a-zA-Z_][a-zA-Z0-9_]*)\\s*,\\s*['\"]([^'\"]*)['\"]\\s*\\)", + Pattern.CASE_INSENSITIVE + ); + Matcher matcher = pattern.matcher(condition); + + if (!matcher.matches()) { + log.warn("无效的 TEXT_MATCH 语法: {}", condition); + return false; + } + + String fieldName = matcher.group(1); + String keyword = matcher.group(2); + + // 检查字段名不能为空 + if (fieldName == null || fieldName.trim().isEmpty()) { + log.warn("TEXT_MATCH 字段名不能为空: {}", condition); + return false; + } + + // 检查关键词不能为空 + if (keyword == null || keyword.trim().isEmpty()) { + log.warn("TEXT_MATCH 关键词不能为空: {}", condition); + return false; + } + + return true; + } + + /** + * 验证简单条件(不包含 && 和 ||,不包含 TEXT_MATCH) */ private boolean isValidSimpleCondition(String condition) { if (condition == null || condition.trim().isEmpty()) { return false; } condition = condition.trim(); + // 匹配简单条件的正则 // 格式:字段名 操作符 值 // 字段名:字母开头,包含字母数字下划线 @@ -422,22 +514,27 @@ public class MilvusServiceImpl implements MilvusService { if (!condition.matches(regex)) { return false; } - // 额外检查:like 操作符的值必须包含 % + + // 【修改】额外检查:like 操作符的限制 if (condition.contains(" like ")) { String value = condition.split("like")[1].trim(); if (!value.contains("%")) { - return false; // like 必须使用 % 通配符 + log.warn("like 操作符必须包含 % 通配符: {}", condition); + return false; + } + + // 【新增】检查是否包含前后都有通配符的模式(Milvus 不支持) + if (value.matches("'%.*%'")) { + log.warn("Milvus 不支持前后都有通配符的 like: {}", condition); + return false; } } + return true; } - - /** - * 处理 Milvus 查询结果(完整版) - */ /** - * 处理 Milvus 查询结果(完整版) + * 处理 Milvus 查询结果 */ private List> processMilvusResults(SearchResp response) { List> results = new ArrayList<>(); @@ -758,6 +855,7 @@ public class MilvusServiceImpl implements MilvusService { .description(sDescription) .isPrimaryKey(false) // 如果不是主键 .isNullable(true) // 允许为空 + // .defaultValue("") // 如果有默认值 // SQL可能较长 .maxLength(1000) diff --git a/src/main/java/com/xly/ocr/service/OcrService.java b/src/main/java/com/xly/ocr/service/OcrService.java new file mode 100644 index 0000000..5641e4e --- /dev/null +++ b/src/main/java/com/xly/ocr/service/OcrService.java @@ -0,0 +1,530 @@ +package com.xly.ocr.service; + +import net.sourceforge.tess4j.Tesseract; +import net.sourceforge.tess4j.TesseractException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; + +import javax.imageio.ImageIO; +import java.awt.*; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; + +@Service +public class OcrService { + + private static final Logger logger = LoggerFactory.getLogger(OcrService.class); + + private final Tesseract tesseract; + + // 配置参数 + private static final List ALLOWED_EXTENSIONS = Arrays.asList(".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"); + private static final long MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB + private static final int BINARIZE_THRESHOLD = 127; + private static final int MIN_WIDTH = 800; + private static final int MIN_HEIGHT = 200; + + // 性能统计 + private static class OcrStats { + long preprocessTime = 0; + long ocrTime = 0; + String imageSize = ""; + + @Override + public String toString() { + return String.format("预处理耗时: %dms, OCR耗时: %dms, 图片尺寸: %s", + preprocessTime, ocrTime, imageSize); + } + } + + public OcrService(@Value("${tesseract.datapath}") String dataPath) { + this.tesseract = new Tesseract(); + + // 基础配置 + this.tesseract.setDatapath(dataPath); + this.tesseract.setLanguage("chi_sim+eng"); + + // 优化识别参数 + configureTesseract(); + + logger.info("Tesseract 初始化完成,语言包路径: {}, 语言: chi_sim+eng", dataPath); + } + + /** + * 配置 Tesseract 参数 + */ + private void configureTesseract() { + // 页面分割模式:3 = 自动页面分割,但没有方向检测 + this.tesseract.setPageSegMode(3); + + // OCR 引擎模式:3 = 默认,基于 LSTM 和传统引擎 + this.tesseract.setOcrEngineMode(3); + + // 提高中文识别率 + this.tesseract.setVariable("preserve_interword_spaces", "1"); + this.tesseract.setVariable("textord_force_make_prop_words", "true"); + + // 可选:设置字符白名单(根据需要启用) + // this.tesseract.setVariable("tessedit_char_whitelist", + // "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,。!?;:\"‘’“”【】()《》"); + + // 可选:设置黑名单(排除干扰字符) + // this.tesseract.setVariable("tessedit_char_blacklist", "|\\/`~@#$%^&*()_+={}[]"); + } + + /** + * 图片预处理 - 优化的处理流程 + */ + private BufferedImage preprocessImage(BufferedImage originalImage) { + if (originalImage == null) { + return null; + } + + try { + long startTime = System.currentTimeMillis(); + + // 1. 自动调整亮度和对比度 + BufferedImage adjusted = autoAdjustBrightnessContrast(originalImage); + + // 2. 灰度化 + BufferedImage grayImage = toGray(adjusted); + + // 3. 自适应二值化(比固定阈值更好) + BufferedImage binaryImage = adaptiveBinarize(grayImage); + + // 4. 降噪处理 + BufferedImage denoisedImage = denoise(binaryImage); + + // 5. 放大图片(如果太小) + BufferedImage scaledImage = scaleImageIfNeeded(denoisedImage); + + // 6. 可选:边缘增强(提高清晰度) + BufferedImage enhancedImage = sharpen(scaledImage); + + long endTime = System.currentTimeMillis(); + logger.debug("图片预处理耗时: {}ms", endTime - startTime); + + return enhancedImage; + + } catch (Exception e) { + logger.error("图片预处理失败: {}", e.getMessage(), e); + return originalImage; + } + } + + /** + * 自动调整亮度和对比度 + */ + private BufferedImage autoAdjustBrightnessContrast(BufferedImage image) { + BufferedImage result = new BufferedImage( + image.getWidth(), image.getHeight(), image.getType()); + + // 计算亮度直方图 + int[] histogram = new int[256]; + for (int y = 0; y < image.getHeight(); y++) { + for (int x = 0; x < image.getWidth(); x++) { + int rgb = image.getRGB(x, y); + int gray = (int)((rgb >> 16 & 0xFF) * 0.299 + + (rgb >> 8 & 0xFF) * 0.587 + + (rgb & 0xFF) * 0.114); + histogram[gray]++; + } + } + + // 找到黑色和白色的阈值 + int total = image.getWidth() * image.getHeight(); + int blackThreshold = 0; + int whiteThreshold = 255; + + int sum = 0; + for (int i = 0; i < 256; i++) { + sum += histogram[i]; + if (sum > total * 0.05) { + blackThreshold = i; + break; + } + } + + sum = 0; + for (int i = 255; i >= 0; i--) { + sum += histogram[i]; + if (sum > total * 0.05) { + whiteThreshold = i; + break; + } + } + + // 应用对比度拉伸 + for (int y = 0; y < image.getHeight(); y++) { + for (int x = 0; x < image.getWidth(); x++) { + int rgb = image.getRGB(x, y); + int r = (rgb >> 16) & 0xFF; + int g = (rgb >> 8) & 0xFF; + int b = rgb & 0xFF; + + // 拉伸到 0-255 范围 + r = stretchValue(r, blackThreshold, whiteThreshold); + g = stretchValue(g, blackThreshold, whiteThreshold); + b = stretchValue(b, blackThreshold, whiteThreshold); + + result.setRGB(x, y, (r << 16) | (g << 8) | b); + } + } + + return result; + } + + private int stretchValue(int value, int black, int white) { + if (value <= black) return 0; + if (value >= white) return 255; + return (value - black) * 255 / (white - black); + } + + /** + * 灰度化 + */ + private BufferedImage toGray(BufferedImage image) { + BufferedImage result = new BufferedImage( + image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); + Graphics g = result.getGraphics(); + g.drawImage(image, 0, 0, null); + g.dispose(); + return result; + } + + /** + * 自适应二值化 - 根据局部区域动态调整阈值 + */ + private BufferedImage adaptiveBinarize(BufferedImage image) { + BufferedImage result = new BufferedImage( + image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY); + + int blockSize = 15; + int constant = 5; + + for (int y = 0; y < image.getHeight(); y++) { + for (int x = 0; x < image.getWidth(); x++) { + // 计算局部区域的平均值 + int sum = 0; + int count = 0; + for (int ky = -blockSize/2; ky <= blockSize/2; ky++) { + for (int kx = -blockSize/2; kx <= blockSize/2; kx++) { + int px = Math.min(Math.max(x + kx, 0), image.getWidth() - 1); + int py = Math.min(Math.max(y + ky, 0), image.getHeight() - 1); + sum += new Color(image.getRGB(px, py)).getRed(); + count++; + } + } + int threshold = sum / count - constant; + + // 应用阈值 + int gray = new Color(image.getRGB(x, y)).getRed(); + int binary = gray > threshold ? 255 : 0; + result.setRGB(x, y, new Color(binary, binary, binary).getRGB()); + } + } + return result; + } + + /** + * 降噪 - 优化的中值滤波 + */ + private BufferedImage denoise(BufferedImage image) { + BufferedImage result = new BufferedImage( + image.getWidth(), image.getHeight(), image.getType()); + + for (int y = 1; y < image.getHeight() - 1; y++) { + for (int x = 1; x < image.getWidth() - 1; x++) { + int[] neighbors = new int[9]; + int index = 0; + for (int ky = -1; ky <= 1; ky++) { + for (int kx = -1; kx <= 1; kx++) { + neighbors[index++] = new Color(image.getRGB(x + kx, y + ky)).getRed(); + } + } + Arrays.sort(neighbors); + int median = neighbors[4]; + result.setRGB(x, y, new Color(median, median, median).getRGB()); + } + } + + // 处理边缘 + for (int x = 0; x < image.getWidth(); x++) { + result.setRGB(x, 0, image.getRGB(x, 0)); + result.setRGB(x, image.getHeight() - 1, image.getRGB(x, image.getHeight() - 1)); + } + for (int y = 0; y < image.getHeight(); y++) { + result.setRGB(0, y, image.getRGB(0, y)); + result.setRGB(image.getWidth() - 1, y, image.getRGB(image.getWidth() - 1, y)); + } + + return result; + } + + /** + * 锐化处理 - 增强文字边缘 + */ + private BufferedImage sharpen(BufferedImage image) { + BufferedImage result = new BufferedImage( + image.getWidth(), image.getHeight(), image.getType()); + + // 拉普拉斯锐化核 + float[] sharpenKernel = { + 0, -1, 0, + -1, 5, -1, + 0, -1, 0 + }; + + for (int y = 1; y < image.getHeight() - 1; y++) { + for (int x = 1; x < image.getWidth() - 1; x++) { + int sum = 0; + int index = 0; + for (int ky = -1; ky <= 1; ky++) { + for (int kx = -1; kx <= 1; kx++) { + int gray = new Color(image.getRGB(x + kx, y + ky)).getRed(); + sum += gray * sharpenKernel[index++]; + } + } + sum = Math.min(255, Math.max(0, sum)); + result.setRGB(x, y, new Color(sum, sum, sum).getRGB()); + } + } + + return result; + } + + /** + * 放大图片(如果图片太小) + */ + private BufferedImage scaleImageIfNeeded(BufferedImage image) { + int width = image.getWidth(); + int height = image.getHeight(); + + if (width >= MIN_WIDTH && height >= MIN_HEIGHT) { + return image; + } + + double scaleX = (double) MIN_WIDTH / width; + double scaleY = (double) MIN_HEIGHT / height; + double scale = Math.max(scaleX, scaleY); + + int newWidth = (int) (width * scale); + int newHeight = (int) (height * scale); + + // 使用更好的插值算法 + BufferedImage result = new BufferedImage(newWidth, newHeight, image.getType()); + Graphics2D g2d = result.createGraphics(); + g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, + RenderingHints.VALUE_INTERPOLATION_BICUBIC); + g2d.setRenderingHint(RenderingHints.KEY_RENDERING, + RenderingHints.VALUE_RENDER_QUALITY); + g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, + RenderingHints.VALUE_ANTIALIAS_ON); + g2d.drawImage(image, 0, 0, newWidth, newHeight, null); + g2d.dispose(); + + logger.debug("图片已放大: {}x{} -> {}x{}", width, height, newWidth, newHeight); + return result; + } + + /** + * 识别图片中的文字(增强版) + */ + public String extractText(File imageFile) { + if (imageFile == null || !imageFile.exists()) { + logger.error("图片文件不存在或为空"); + return "图片文件不存在"; + } + + OcrStats stats = new OcrStats(); + + try { + logger.info("开始识别图片: {}, 大小: {} bytes", + imageFile.getAbsolutePath(), imageFile.length()); + + // 读取原始图片 + long readStart = System.currentTimeMillis(); + BufferedImage originalImage = ImageIO.read(imageFile); + if (originalImage == null) { + return "无法读取图片文件,请确保图片格式正确"; + } + stats.imageSize = originalImage.getWidth() + "x" + originalImage.getHeight(); + + // 图片预处理 + long preprocessStart = System.currentTimeMillis(); + BufferedImage processedImage = preprocessImage(originalImage); + stats.preprocessTime = System.currentTimeMillis() - preprocessStart; + + // 可选:保存预处理图片用于调试(生产环境可注释) + if (logger.isDebugEnabled()) { + saveDebugImage(processedImage, imageFile); + } + + // 执行 OCR + long ocrStart = System.currentTimeMillis(); + String result = tesseract.doOCR(processedImage); + stats.ocrTime = System.currentTimeMillis() - ocrStart; + + logger.info("识别完成 - {}", stats); + + // 清理识别结果 + result = cleanResult(result); + + if (result.isEmpty()) { + logger.warn("识别结果为空,可能需要调整预处理参数"); + } + + return result; + + } catch (TesseractException e) { + logger.error("OCR识别失败: {}", e.getMessage(), e); + return "OCR识别失败: " + e.getMessage(); + } catch (IOException e) { + logger.error("读取图片失败: {}", e.getMessage(), e); + return "读取图片失败: " + e.getMessage(); + } + } + + /** + * 保存调试图片(仅用于调试) + */ + private void saveDebugImage(BufferedImage image, File originalFile) { + try { + String debugPath = originalFile.getParent() + "/debug_" + originalFile.getName(); + File debugFile = new File(debugPath); + ImageIO.write(image, "png", debugFile); + logger.debug("预处理图片已保存: {}", debugPath); + } catch (IOException e) { + logger.debug("保存调试图片失败: {}", e.getMessage()); + } + } + + /** + * 清理识别结果 + */ + private String cleanResult(String result) { + if (result == null || result.isEmpty()) { + return ""; + } + + // 去除首尾空白 + result = result.trim(); + + // 规范化换行符 + result = result.replaceAll("\\r\\n", "\n") + .replaceAll("\\r", "\n"); + + // 合并多个空行 + result = result.replaceAll("\n{3,}", "\n\n"); + + // 去除行首行尾空格 + String[] lines = result.split("\n"); + StringBuilder cleaned = new StringBuilder(); + for (String line : lines) { + cleaned.append(line.trim()).append("\n"); + } + + return cleaned.toString().trim(); + } + + /** + * 封装方法,接收上传的 MultipartFile + */ + public String extractTextFromMultipartFile(MultipartFile file) { + if (file == null || file.isEmpty()) { + logger.warn("上传的文件为空"); + return "上传的文件为空"; + } + + // 验证文件大小 + if (file.getSize() > MAX_FILE_SIZE) { + logger.warn("文件过大: {} bytes, 超过限制: {} bytes", + file.getSize(), MAX_FILE_SIZE); + return String.format("文件过大,最大支持 %dMB", MAX_FILE_SIZE / 1024 / 1024); + } + + // 验证文件格式 + String originalFilename = file.getOriginalFilename(); + if (originalFilename != null && !isAllowedImage(originalFilename)) { + logger.warn("不支持的文件格式: {}", originalFilename); + return "不支持的文件格式,仅支持: " + String.join(", ", ALLOWED_EXTENSIONS); + } + + Path tempFile = null; + try { + // 创建临时文件 + String suffix = getFileExtension(originalFilename); + tempFile = Files.createTempFile("ocr_", suffix); + file.transferTo(tempFile.toFile()); + + logger.info("临时文件创建成功: {}", tempFile); + + // 执行 OCR + String result = extractText(tempFile.toFile()); + + return result; + + } catch (IOException e) { + logger.error("文件处理失败: {}", e.getMessage(), e); + return "文件处理失败: " + e.getMessage(); + } finally { + // 清理临时文件 + cleanupTempFile(tempFile); + } + } + + /** + * 清理临时文件 + */ + private void cleanupTempFile(Path tempFile) { + if (tempFile != null) { + try { + Files.deleteIfExists(tempFile); + logger.debug("临时文件已删除: {}", tempFile); + } catch (IOException e) { + logger.warn("删除临时文件失败: {}", tempFile, e); + // 注册JVM退出时删除 + tempFile.toFile().deleteOnExit(); + } + } + } + + /** + * 批量识别(用于多张图片) + */ + public List batchExtractText(List files) { + return files.stream() + .map(this::extractTextFromMultipartFile) + .collect(java.util.stream.Collectors.toList()); + } + + /** + * 检查文件扩展名是否允许 + */ + private boolean isAllowedImage(String filename) { + if (filename == null) { + return false; + } + String lowerFilename = filename.toLowerCase(); + return ALLOWED_EXTENSIONS.stream() + .anyMatch(lowerFilename::endsWith); + } + + /** + * 获取文件扩展名 + */ + private String getFileExtension(String filename) { + if (filename == null || !filename.contains(".")) { + return ".jpg"; + } + return filename.substring(filename.lastIndexOf(".")); + } +} \ No newline at end of file diff --git a/src/main/java/com/xly/ocr/test/Main.java b/src/main/java/com/xly/ocr/test/Main.java new file mode 100644 index 0000000..43d4d7b --- /dev/null +++ b/src/main/java/com/xly/ocr/test/Main.java @@ -0,0 +1,259 @@ +package com.xly.ocr.test; + +import com.benjaminwan.ocrlibrary.OcrResult; +import com.benjaminwan.ocrlibrary.Point; +import com.benjaminwan.ocrlibrary.TextBlock; +import io.github.mymonstercat.Model; +import io.github.mymonstercat.ocr.InferenceEngine; +import io.github.mymonstercat.ocr.config.ParamConfig; + +import javax.imageio.ImageIO; +import java.awt.*; +import java.awt.image.BufferedImage; +import java.awt.image.Kernel; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class Main { + + static { + try { + String customTempDir = "D:/temp/ocrJava"; + File tempDir = new File(customTempDir); + if (!tempDir.exists()) { + tempDir.mkdirs(); + } + System.setProperty("java.io.tmpdir", customTempDir); + System.setProperty("TMP", customTempDir); + System.setProperty("TEMP", customTempDir); + System.setProperty("ORT_TMP_DIR", customTempDir); + + System.out.println("=================================="); + System.out.println("java.io.tmpdir: " + System.getProperty("java.io.tmpdir")); + System.out.println("临时目录是否存在: " + tempDir.exists()); + System.out.println("=================================="); + + } catch (Exception e) { + System.err.println("设置临时目录失败: " + e.getMessage()); + e.printStackTrace(); + } + } + + public static void main(String[] args) { + try { + System.out.println("OCR 程序开始执行..."); + + // 检查旧的临时目录 + String oldTempPath = "C:\\Users\\钱豹\\AppData\\Local\\Temp\\ocrJava"; + File oldTempDir = new File(oldTempPath); + if (oldTempDir.exists()) { + System.out.println("发现旧的临时目录: " + oldTempPath); + } + + // 1. 初始化引擎(V4 模型) + System.out.println("正在初始化 OCR 引擎..."); + InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); + + // 2. 创建优化的参数配置 + ParamConfig config = createOptimizedParamConfig(); + + // 3. 设置图片路径 + String imagePath = "E:/aa/b.jpg"; + + File imageFile = new File(imagePath); + if (!imageFile.exists()) { + System.err.println("图片文件不存在: " + imagePath); + return; + } + + // 4. 图像预处理(可选,注释掉可提高速度) + System.out.println("正在进行图像预处理..."); + String processedImagePath = preprocessImage(imagePath); + + // 5. 执行识别 + System.out.println("开始识别图片: " + processedImagePath); + long startTime = System.currentTimeMillis(); + OcrResult ocrResult = engine.runOcr(processedImagePath, config); + long endTime = System.currentTimeMillis(); + + // 6. 输出识别结果 + String text = ocrResult.getStrRes().trim(); + System.out.println("=================================="); + System.out.println("识别结果:"); + System.out.println(text); + System.out.println("=================================="); + System.out.println("识别耗时: " + (endTime - startTime) + " ms"); + + // 7. 输出文本块详细信息(可选,用于调试) + if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) { + System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):"); + List textBlocks = ocrResult.getTextBlocks(); + for (int i = 0; i < textBlocks.size(); i++) { + TextBlock block = textBlocks.get(i); + System.out.printf(" 块%d: %s (置信度: %.2f)%n", + i + 1, + block.getText(), + block.getBoxScore() + ); + } + } + + // 8. 清理临时文件 + if (!processedImagePath.equals(imagePath)) { + File processedFile = new File(processedImagePath); + if (processedFile.exists()) { + processedFile.delete(); + System.out.println("\n已清理临时文件: " + processedImagePath); + } + } + + } catch (Exception e) { + System.err.println("OCR 识别失败: " + e.getMessage()); + e.printStackTrace(); + } + } + + private static ParamConfig createOptimizedParamConfig() { + ParamConfig config = new ParamConfig(); + + // 手写体专用超强参数 + config.setPadding(100); + config.setMaxSideLen(1200); + + // 极低阈值 = 不漏检任何手写文字 + config.setBoxScoreThresh(0.25f); + config.setBoxThresh(0.15f); + config.setUnClipRatio(2.5f); + + // 必须开启角度矫正 + config.setDoAngle(true); + config.setMostAngle(true); + + return config; + } + + /** + * 图像预处理:提高OCR识别准确度 + */ + private static String preprocessImage(String imagePath) throws IOException { + File inputFile = new File(imagePath); + BufferedImage originalImage = ImageIO.read(inputFile); + + if (originalImage == null) return imagePath; + + // 固定缩放到最佳尺寸 + BufferedImage resized = resizeImageWithQuality(originalImage, 1000, 1500); + + // 手写体必须:二值化(黑白强化)+ 锐化 + BufferedImage binary = toBinary(resized); // 黑白强化,核心! + BufferedImage sharp = sharpenImage(binary); // 锐化 + + // 保存 + String outputPath = imagePath.substring(0, imagePath.lastIndexOf(".")) + "_final.jpg"; + ImageIO.write(sharp, "jpg", new File(outputPath)); + return outputPath; + } + + // 手写体专用:黑白二值化,文字瞬间清晰 + private static BufferedImage toBinary(BufferedImage image) { + BufferedImage gray = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); + Graphics g = gray.getGraphics(); + g.drawImage(image, 0, 0, null); + g.dispose(); + + // 手写体阈值 120,文字最清晰 + int threshold = 120; + for (int y = 0; y < gray.getHeight(); y++) { + for (int x = 0; x < gray.getWidth(); x++) { + int rgb = gray.getRGB(x, y); + int grayValue = (rgb >> 16) & 0xFF; + if (grayValue < threshold) { + gray.setRGB(x, y, Color.BLACK.getRGB()); + } else { + gray.setRGB(x, y, Color.WHITE.getRGB()); + } + } + } + return gray; + } + + /** + * 高质量调整图片大小 + */ + private static BufferedImage resizeImageWithQuality(BufferedImage original, int maxWidth, int maxHeight) { + int width = original.getWidth(); + int height = original.getHeight(); + + // 如果图片尺寸合适,不进行调整 + if (width <= maxWidth && height <= maxHeight) { + return original; + } + + // 计算缩放比例 + double ratio = Math.min((double) maxWidth / width, (double) maxHeight / height); + int newWidth = (int) (width * ratio); + int newHeight = (int) (height * ratio); + + // 创建缩放后的图片 + BufferedImage resized = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB); + Graphics2D g = resized.createGraphics(); + g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); + g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); + g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); + g.drawImage(original, 0, 0, newWidth, newHeight, null); + g.dispose(); + + System.out.println("图片已缩放: " + width + "x" + height + " -> " + newWidth + "x" + newHeight); + return resized; + } + + /** + * 增强对比度 + */ + private static BufferedImage enhanceContrast(BufferedImage image) { + BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); + + for (int y = 0; y < image.getHeight(); y++) { + for (int x = 0; x < image.getWidth(); x++) { + Color color = new Color(image.getRGB(x, y)); + + // 增强对比度 + int red = (int) (color.getRed() * 1.2); + int green = (int) (color.getGreen() * 1.2); + int blue = (int) (color.getBlue() * 1.2); + + // 限制RGB值范围 + red = Math.min(255, Math.max(0, red)); + green = Math.min(255, Math.max(0, green)); + blue = Math.min(255, Math.max(0, blue)); + + result.setRGB(x, y, new Color(red, green, blue).getRGB()); + } + } + + System.out.println("对比度已增强"); + return result; + } + + /** + * 锐化图像(可选,使文字边缘更清晰) + */ + private static BufferedImage sharpenImage(BufferedImage image) { + BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); + + float[] sharpenKernel = { + 0, -1, 0, + -1, 5, -1, + 0, -1, 0 + }; + + Kernel kernel = new Kernel(3, 3, sharpenKernel); + java.awt.image.ConvolveOp op = new java.awt.image.ConvolveOp(kernel, java.awt.image.ConvolveOp.EDGE_NO_OP, null); + op.filter(image, result); + + System.out.println("图像已锐化"); + return result; + } +} \ No newline at end of file diff --git a/src/main/java/com/xly/ocr/test/Test.java b/src/main/java/com/xly/ocr/test/Test.java new file mode 100644 index 0000000..c08789e --- /dev/null +++ b/src/main/java/com/xly/ocr/test/Test.java @@ -0,0 +1,182 @@ +package com.xly.ocr.test; + +import com.benjaminwan.ocrlibrary.OcrResult; +import com.benjaminwan.ocrlibrary.TextBlock; +import io.github.mymonstercat.Model; +import io.github.mymonstercat.ocr.InferenceEngine; +import io.github.mymonstercat.ocr.config.ParamConfig; + +import javax.imageio.ImageIO; +import java.awt.*; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.util.List; + +public class Test { + + static { + try { + String customTempDir = "D:/temp/ocrJava"; + File tempDir = new File(customTempDir); + if (!tempDir.exists()) { + tempDir.mkdirs(); + } + System.setProperty("java.io.tmpdir", customTempDir); + System.setProperty("TMP", customTempDir); + System.setProperty("TEMP", customTempDir); + + System.out.println("=================================="); + System.out.println("临时目录: " + System.getProperty("java.io.tmpdir")); + System.out.println("=================================="); + + } catch (Exception e) { + System.err.println("设置临时目录失败: " + e.getMessage()); + } + } + + public static void main(String[] args) { + try { + System.out.println("OCR 程序开始执行..."); + + // 1. 初始化引擎(使用 v4 模型) + System.out.println("正在初始化 OCR 引擎 (PP-OCRv4)..."); + InferenceEngine engine = InferenceEngine.getInstance(Model.ONNX_PPOCR_V4); + + // 2. 创建优化的参数配置 + ParamConfig config = createOptimizedParamConfig(); + + // 3. 图片路径 + String imagePath = "E:/aa/b.jpg"; + File imageFile = new File(imagePath); + if (!imageFile.exists()) { + System.err.println("图片文件不存在: " + imagePath); + return; + } + + // 4. 图像预处理(直接处理原图,不保存临时文件) + System.out.println("正在进行图像预处理..."); + BufferedImage processedImage = preprocessImage(imageFile); + + // 5. 保存预处理后的图片到临时目录 + String processedImagePath = "D:/temp/ocrJava/processed_" + System.currentTimeMillis() + ".png"; + ImageIO.write(processedImage, "png", new File(processedImagePath)); + System.out.println("预处理图片已保存: " + processedImagePath); + + // 6. 执行识别 + System.out.println("开始识别图片..."); + long startTime = System.currentTimeMillis(); + OcrResult ocrResult = engine.runOcr(processedImagePath, config); + long endTime = System.currentTimeMillis(); + + // 7. 输出结果 + String text = ocrResult.getStrRes().trim(); + System.out.println("\n=================================="); + System.out.println("识别结果:"); + System.out.println(text); + System.out.println("=================================="); + System.out.println("识别耗时: " + (endTime - startTime) + " ms"); + + // 8. 输出每个文本块 + if (ocrResult.getTextBlocks() != null && !ocrResult.getTextBlocks().isEmpty()) { + System.out.println("\n文本块详情(共" + ocrResult.getTextBlocks().size() + "块):"); + List textBlocks = ocrResult.getTextBlocks(); + for (int i = 0; i < textBlocks.size(); i++) { + TextBlock block = textBlocks.get(i); + System.out.printf(" 块%d: %s (置信度: %.2f)%n", + i + 1, + block.getText(), + block.getBoxScore() + ); + } + } + + // 9. 清理临时文件 + new File(processedImagePath).delete(); + + } catch (Exception e) { + System.err.println("OCR 识别失败: " + e.getMessage()); + e.printStackTrace(); + } + } + + /** + * 优化的参数配置 + */ + private static ParamConfig createOptimizedParamConfig() { + ParamConfig config = new ParamConfig(); + + config.setPadding(50); + config.setMaxSideLen(0); + config.setBoxScoreThresh(0.4f); + config.setBoxThresh(0.25f); + config.setUnClipRatio(1.8f); + config.setDoAngle(true); + config.setMostAngle(true); + + return config; + } + + /** + * 图像预处理 - 直接返回处理后的 BufferedImage + */ + private static BufferedImage preprocessImage(File imageFile) throws IOException { + BufferedImage original = ImageIO.read(imageFile); + if (original == null) { + throw new IOException("无法读取图片: " + imageFile.getPath()); + } + + System.out.println("原始尺寸: " + original.getWidth() + "x" + original.getHeight()); + + BufferedImage processed = original; + + // 1. 如果图片太大,缩小尺寸 + if (processed.getWidth() > 2000 || processed.getHeight() > 2000) { + processed = resizeImage(processed, 1600, 1600); + } + + // 2. 增强对比度 + processed = enhanceContrast(processed); + + System.out.println("处理后尺寸: " + processed.getWidth() + "x" + processed.getHeight()); + + return processed; + } + + /** + * 调整图片大小 + */ + private static BufferedImage resizeImage(BufferedImage image, int maxWidth, int maxHeight) { + int w = image.getWidth(); + int h = image.getHeight(); + double ratio = Math.min((double) maxWidth / w, (double) maxHeight / h); + if (ratio >= 1) return image; + + int newW = (int) (w * ratio); + int newH = (int) (h * ratio); + + BufferedImage resized = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_RGB); + Graphics2D g = resized.createGraphics(); + g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); + g.drawImage(image, 0, 0, newW, newH, null); + g.dispose(); + return resized; + } + + /** + * 增强对比度 + */ + private static BufferedImage enhanceContrast(BufferedImage image) { + BufferedImage result = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); + for (int y = 0; y < image.getHeight(); y++) { + for (int x = 0; x < image.getWidth(); x++) { + Color c = new Color(image.getRGB(x, y)); + int r = Math.min(255, (int) (c.getRed() * 1.15)); + int g = Math.min(255, (int) (c.getGreen() * 1.15)); + int b = Math.min(255, (int) (c.getBlue() * 1.15)); + result.setRGB(x, y, new Color(r, g, b).getRGB()); + } + } + return result; + } +} \ No newline at end of file diff --git a/src/main/java/com/xly/ocr/web/OcrController.java b/src/main/java/com/xly/ocr/web/OcrController.java new file mode 100644 index 0000000..e56a99f --- /dev/null +++ b/src/main/java/com/xly/ocr/web/OcrController.java @@ -0,0 +1,42 @@ +//package com.xly.ocr.web; +// +//import com.xly.ocr.service.OcrService; +//import org.springframework.beans.factory.annotation.Autowired; +//import org.springframework.http.ResponseEntity; +//import org.springframework.web.bind.annotation.*; +//import org.springframework.web.multipart.MultipartFile; +// +//import java.util.HashMap; +//import java.util.List; +//import java.util.Map; +// +//@RestController +//@RequestMapping("/api/ocr") +//public class OcrController { +// +// @Autowired +// private OcrService ocrService; +// +// @PostMapping("/extract") +// public ResponseEntity> extractText( +// @RequestParam("file") MultipartFile file) { +// +// Map response = new HashMap<>(); +// long startTime = System.currentTimeMillis(); +// +// String result = ocrService.extractTextFromMultipartFile(file); +// +// response.put("text", result); +// response.put("time", System.currentTimeMillis() - startTime); +// response.put("success", !result.startsWith("错误") && !result.startsWith("失败")); +// +// return ResponseEntity.ok(response); +// } +// +// @PostMapping("/batch") +// public ResponseEntity> batchExtract( +// @RequestParam("files") List files) { +// List results = ocrService.batchExtractText(files); +// return ResponseEntity.ok(results); +// } +//} \ No newline at end of file diff --git a/src/main/java/com/xly/service/XlyErpService.java b/src/main/java/com/xly/service/XlyErpService.java index 0d175f0..3631231 100644 --- a/src/main/java/com/xly/service/XlyErpService.java +++ b/src/main/java/com/xly/service/XlyErpService.java @@ -161,13 +161,29 @@ public class XlyErpService { && ObjectUtil.isNotEmpty(session.getCurrentTool().getSInputTabelName()) && ObjectUtil.isNotEmpty(session.getCurrentTool().getSStructureMemo())) ){ - //查询是否走向量库 还是数据库查询 - Boolean isAggregation = aiAgent.routeQuery(session.getUserId(), input); + //查询缓存是否存在取缓存 直接走 + Map cachMap = getDynamicTableCach(session,userInput); + Boolean isAggregation; + Boolean bHasCach; + String sCleanSql = StrUtil.EMPTY; + if(ObjectUtil.isEmpty(cachMap)){ + //查询是否走向量库 还是数据库查询 + isAggregation = aiAgent.routeQuery(session.getUserId(), input); + session.setDbCach("D"); + bHasCach = false; + }else{ + isAggregation = "MYSQL".equals(cachMap.get("cachType")); + session.setDbCach("H"); + bHasCach = true; + sCleanSql = ObjectUtil.isNotEmpty(cachMap.get("sSqlContent"))?cachMap.get("sSqlContent").toString() : StrUtil.EMPTY; + } if(!isAggregation){ //获取常量库内容 - sResponMessage = getMilvus(session, input, aiAgent); + session.setDbType("X"); + sResponMessage = getMilvus(session, input, aiAgent,bHasCach); }else { - sResponMessage = getDynamicTableSql(session, input, userId, userInput,0,StrUtil.EMPTY,StrUtil.EMPTY,"0",StrUtil.EMPTY, aiAgent); + session.setDbType("G"); + sResponMessage = getDynamicTableSql(session, input, userId, userInput,0,StrUtil.EMPTY,StrUtil.EMPTY,"0",StrUtil.EMPTY, aiAgent,sCleanSql); } return Flux.just(AiResponseDTO.builder() .aiText(sResponMessage) @@ -235,8 +251,10 @@ public class XlyErpService { session = userSceneSessionService.getUserSceneSession(userId,sUserName,sBrandsId,sSubsidiaryId,sUserType,authorization); session.setAuthorization(authorization); session.setSFunPrompts(null); + session.setDbCach(StrUtil.EMPTY); + session.setDbType(StrUtil.EMPTY); + sceneName = ObjectUtil.isNotEmpty(session.getCurrentScene())?session.getCurrentScene().getSSceneName():StrUtil.EMPTY; - methodName = ObjectUtil.isNotEmpty(session.getCurrentTool())?session.getCurrentTool().getSMethodName():StrUtil.EMPTY; // 2. 特殊指令:重置场景(无论是否已选,都可重置) if (input.contains("重置") || input.contains("重新选择")) { //清除记忆缓存 @@ -268,7 +286,7 @@ public class XlyErpService { ){ sResponMessage = aiAgent.chat(userId, input); } - + methodName = ObjectUtil.isNotEmpty(session.getCurrentTool())?session.getCurrentTool().getSMethodName():StrUtil.EMPTY; if(ObjectUtil.isNotEmpty(session.getCurrentTool()) && !ObjectUtil.isNotEmpty(session.getCurrentTool().getSInputTabelName()) ){ @@ -283,28 +301,44 @@ public class XlyErpService { && ObjectUtil.isNotEmpty(session.getCurrentTool().getSInputTabelName()) && ObjectUtil.isNotEmpty(session.getCurrentTool().getSStructureMemo())) ){ - //查询是否走向量库 还是数据库查询 - Boolean isAggregation = aiAgent.routeQuery(session.getUserId(), input); + //查询缓存是否存在取缓存 直接走 + Map cachMap = getDynamicTableCach(session,userInput); + Boolean isAggregation; + Boolean bHasCach; + String sCleanSql = StrUtil.EMPTY; + if(ObjectUtil.isEmpty(cachMap)){ + //查询是否走向量库 还是数据库查询 + isAggregation = aiAgent.routeQuery(session.getUserId(), input); + session.setDbCach("D"); + bHasCach = false; + }else{ + isAggregation = "MYSQL".equals(cachMap.get("cachType")); + session.setDbCach("H"); + bHasCach = true; + sCleanSql = ObjectUtil.isNotEmpty(cachMap.get("sSqlContent"))?cachMap.get("sSqlContent").toString() : StrUtil.EMPTY; + } if(!isAggregation){ //获取常量库内容 - sResponMessage = getMilvus(session, input, aiAgent); + session.setDbType("X"); + sResponMessage = getMilvus(session, input, aiAgent,bHasCach); }else { - sResponMessage = getDynamicTableSql(session, input, userId, userInput,0,StrUtil.EMPTY,StrUtil.EMPTY,"0",StrUtil.EMPTY, aiAgent); + session.setDbType("G"); + sResponMessage = getDynamicTableSql(session, input, userId, userInput,0,StrUtil.EMPTY,StrUtil.EMPTY,"0",StrUtil.EMPTY, aiAgent,sCleanSql); } - return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText(sResponMessage).sReturnType(ReturnTypeCode.HTML.getCode()).build(); + return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText(sResponMessage).sReturnType(ReturnTypeCode.HTML.getCode()).dbType(session.getDbType()).dbCach(session.getDbCach()).build(); } else if (ObjectUtil.isNotEmpty(session.getCurrentTool())) { //2.处理工具参数采集结束后业务逻辑处理 //调用方法,参数缺失部分提示,就直接使用方法返回的 sResponMessage = dynamicToolProvider.doDynamicTool(session.getCurrentTool(),session); - return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText(sResponMessage).sReturnType(ReturnTypeCode.HTML.getCode()).build(); + return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText(sResponMessage).dbType(session.getDbType()).dbCach(session.getDbCach()).sReturnType(ReturnTypeCode.HTML.getCode()).build(); }else if(session.getCurrentScene()== null ){ - return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText("当前场景:没有选择 退回当前场景 请输入 "+ CommonConstant.RESET + sResponMessage).sReturnType(ReturnTypeCode.HTML.getCode()).build(); + return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText("当前场景:没有选择 退回当前场景 请输入 "+ CommonConstant.RESET + sResponMessage).dbType(session.getDbType()).dbCach(session.getDbCach()).sReturnType(ReturnTypeCode.HTML.getCode()).build(); }else{ return getChatiAgent (input, session); } } catch (Exception e) { e.printStackTrace(); - return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText("系统异常:" + e.getMessage() + ",请稍后重试!").sReturnType(ReturnTypeCode.HTML.getCode()).build(); + return AiResponseDTO.builder().sSceneName(sceneName).sMethodName(methodName).aiText("系统异常:" + e.getMessage() + ",请稍后重试!").dbType(session.getDbType()).dbCach(session.getDbCach()).sReturnType(ReturnTypeCode.HTML.getCode()).build(); }finally { //5.执行工具方法后,清除记忆 if(session !=null && session.getBCleanMemory()){ @@ -363,23 +397,56 @@ public class XlyErpService { * @return java.lang.String * @Description 查询向量库 **/ - private String getMilvus(UserSceneSession session,String userInput,ErpAiAgent aiAgent){ + private String getMilvus(UserSceneSession session,String userInput,ErpAiAgent aiAgent,Boolean bCach){ String resultExplain = "信息模糊,请提供更具体的问题或指令"; try{ + addSessionUserQuestionList(session, userInput); String sVectorfiled = session.getCurrentTool().getSVectorfiled(); String sInputTabelName = session.getCurrentTool().getSInputTabelName(); String sVectorfiledAll = session.getCurrentTool().getSVectorfiledAll(); - Map rMap = milvusService.getMilvusFiled(sVectorfiled,sVectorfiledAll); + String sVectorfiledShow = session.getCurrentTool().getSVectorfiledShow(); + Map rMap = milvusService.getMilvusFiled(sVectorfiled,sVectorfiledAll,sVectorfiledShow); String sMilvusFiled = rMap.get("sMilvusFiled").toString(); String sMilvusFiledDescription = rMap.get("sMilvusFiledDescription").toString(); String sMilvusFiledDescriptionAll = rMap.get("sMilvusFiledDescriptionAll").toString(); - List fields = (List) rMap.get("sFileds"); -// List> title = (List>) rMap.get("title"); - String milvusFilter = aiAgent.getMilvusFilter(session.getUserId(),userInput, sMilvusFiled, sMilvusFiledDescription); - List> data = milvusService.getDataToCollection(sInputTabelName, milvusFilter,userInput,100,fields); - //采用表格形式显示 - resultExplain = aiAgent.explainMilvusResult(session.getUserId(),userInput,sMilvusFiledDescriptionAll,JSONObject.toJSONString(data)); - //buildMarkdownTableWithStream(data, title); + List filedsShow = (List) rMap.get("filedsShow"); + List> title = (List>) rMap.get("title"); + String milvusFilter = StrUtil.EMPTY; + if(!bCach){ + milvusFilter = aiAgent.getMilvusFilter(session.getUserId(),userInput, sMilvusFiled, sMilvusFiledDescription,DateUtil.now()); + log.info("查询向量库条件{}",milvusFilter); + milvusFilter = milvusService.isValidMilvusFilter(milvusFilter)?milvusFilter : null; + log.info("实际查询向量库条件{}",milvusFilter); + } + Integer pageSize = 100; + if(ObjectUtil.isEmpty(milvusFilter)){ + pageSize = 10; + } +// 待条件全查 不带 10条 + List> data = milvusService.getDataToCollection(sInputTabelName, milvusFilter,userInput,pageSize,filedsShow); + //存储到历史问题库(带where条件了就不存)并且没有记录过缓存 + if(!bCach && ObjectUtil.isEmpty(milvusFilter)){ + //执行操作记录表 + try{ + List chatMessage = operableChatMemoryProvider.getCurrentChatMessages(session.getUserId()); + //插入向量库 + doAiUserAgentQuestion(session,userInput,milvusFilter,"MILVUS",chatMessage); + }catch (Exception e){ + log.error("插入向量库异常",e); + } + } + //采用表格形式显示明细、...详情、...记录、...列表、...清单 + if(ObjectUtil.isEmpty(milvusFilter) + || userInput.contains("明细") + || userInput.contains("详情") + || userInput.contains("记录") + || userInput.contains("列表") + || userInput.contains("清单") + ){ + resultExplain = buildMarkdownTableWithStream(data, title); + }else{ + resultExplain = aiAgent.explainMilvusResult(session.getUserId(),userInput,sMilvusFiledDescriptionAll,JSONObject.toJSONString(data)); + } return resultExplain; }catch (Exception e){ e.printStackTrace(); @@ -435,25 +502,16 @@ public class XlyErpService { * @return java.lang.String * @Description 获取执行动态SQL **/ - private String getDynamicTableSql(UserSceneSession session,String input,String userId,String userInput,Integer attempt,String errorSql,String errorMessage,String iErroCount,String historySqlList,ErpAiAgent aiAgent){ + private String getDynamicTableSql(UserSceneSession session,String input,String userId,String userInput,Integer attempt,String errorSql,String errorMessage,String iErroCount,String historySqlList,ErpAiAgent aiAgent,String cleanSql){ String resultExplain = "信息模糊,请提供更具体的问题或指令"; try{ while (attempt < maxRetries) { try{ attempt = attempt+1; if(attempt==1){ - List userQuestionList = session.getSUserQuestionList(); - if(ObjectUtil.isEmpty(userQuestionList)){ - userQuestionList = new ArrayList<>(); - } - String sQuestion = StrUtil.replace(userInput," ",StrUtil.EMPTY); - sQuestion = StrUtil.replace(sQuestion,"\t",StrUtil.EMPTY); - sQuestion = StrUtil.replace(sQuestion,"\n",StrUtil.EMPTY); - sQuestion = sQuestion.toLowerCase(); - userQuestionList.add(sQuestion); - session.setSUserQuestionList(userQuestionList); + addSessionUserQuestionList(session, userInput); } - return getDynamicTableSqlExec(session, input, userId, userInput,errorSql,errorMessage,iErroCount,historySqlList, aiAgent); + return getDynamicTableSqlExec(session, input, userId, userInput,errorSql,errorMessage,iErroCount,historySqlList, aiAgent, cleanSql); }catch (SqlValidateException e){ return "本场景没有识别到您的意图
如果切换场景,点[回首页],如果在本场景下,转换意图,点[清除记忆]"; }catch (Exception e){ @@ -472,7 +530,7 @@ public class XlyErpService { if (attempt == maxRetries) { return resultExplain +"
查询的SQL语句:"+historySqlList; } else { - return getDynamicTableSql( session, input, userId, userInput, attempt,errorSqlOld,errorMessageOld,attempt.toString(),historySqlList, aiAgent); + return getDynamicTableSql( session, input, userId, userInput, attempt,errorSqlOld,errorMessageOld,attempt.toString(),historySqlList, aiAgent,cleanSql); } } } @@ -484,6 +542,19 @@ public class XlyErpService { return resultExplain; } + private void addSessionUserQuestionList(UserSceneSession session,String userInput){ + List userQuestionList = session.getSUserQuestionList(); + if(ObjectUtil.isEmpty(userQuestionList)){ + userQuestionList = new ArrayList<>(); + } + String sQuestion = StrUtil.replace(userInput," ",StrUtil.EMPTY); + sQuestion = StrUtil.replace(sQuestion,"\t",StrUtil.EMPTY); + sQuestion = StrUtil.replace(sQuestion,"\n",StrUtil.EMPTY); + sQuestion = sQuestion.toLowerCase(); + userQuestionList.add(sQuestion); + session.setSUserQuestionList(userQuestionList); + } + /*** * @Author 钱豹 * @Date 19:59 2026/3/4 @@ -510,18 +581,15 @@ public class XlyErpService { * @return java.lang.String * @Description 执行动态sSql **/ - private String getDynamicTableSqlExec(UserSceneSession session,String input,String userId,String userInput,String errorSql,String errorMessage,String iErroCount,String historySqlList,ErpAiAgent aiAgent){ + private String getDynamicTableSqlExec(UserSceneSession session,String input,String userId,String userInput,String errorSql,String errorMessage,String iErroCount,String historySqlList,ErpAiAgent aiAgent,String cleanSql){ // 1. 构建自然语言转SQLAgent, List> sqlResult; - String cleanSql = StrUtil.EMPTY; String rawSql; String tableStruct = session.getCurrentTool().getSStructureMemo(); String sError_mes; Boolean doAddSql = false; List chatMessage = new ArrayList<>(); try{ - //获取缓存动态SQL -// cleanSql = getDynamicTableNl2Sql(session,input); //如果之前已查询直接返回 if(ObjectUtil.isEmpty(cleanSql)){ DynamicTableNl2SqlAiAgent aiDynamicTableNl2SqlAiAgent = createDynamicTableNl2SqlAiAgent(userId, input, session); @@ -575,7 +643,7 @@ public class XlyErpService { //插入常用操作 if(doAddSql){ //执行操作记录表 - doAiUserAgentQuestion(session,input,cleanSql,chatMessage); + doAiUserAgentQuestion(session,input,cleanSql,"MYSQL",chatMessage); } String sText = aiAgent.explainSqlResult( userId, @@ -594,18 +662,41 @@ public class XlyErpService { * @return java.lang.String * @Description 获取动态SQL(历史中查询) **/ + private Map getDynamicTableCach(UserSceneSession session,String input){ + try{ + String searchText = session.getCurrentScene().getSId()+"_"+session.getCurrentTool().getSId()+input; + //根据问题查询向量库 + Map serMap = aiGlobalAgentQuestionSqlEmitterService.queryAiGlobalAgentQuestionSqlEmitter(searchText, "ai_global_agent_question_sql"); + return serMap; + }catch (Exception e){ + log.error("取是否走缓存异常"); + } + return null; + } + + /*** + * @Author 钱豹 + * @Date 17:04 2026/3/19 + * @Param [session] + * @return java.lang.String + * @Description 获取动态SQL(历史中查询) + **/ private String getDynamicTableNl2Sql(UserSceneSession session,String input){ // String sReidKey = SqlValidateUtil.getsKey( session.getCurrentScene().getSId(), session.getCurrentTool().getSId(), input); // Object sSql = redisService.get(sReidKey); // if(ObjectUtil.isNotEmpty(sSql)){ // return sSql.toString(); // } - String searchText = session.getCurrentScene().getSId()+"_"+session.getCurrentTool().getSId()+input; - //SqlValidateUtil.getsKey( session.getCurrentScene().getSId(), session.getCurrentTool().getSId(), SqlValidateUtil.getsQuestion(session.getSUserQuestionList())); - //根据问题查询向量库 - Map serMap = aiGlobalAgentQuestionSqlEmitterService.queryAiGlobalAgentQuestionSqlEmitter(searchText, "ai_global_agent_question_sql"); - if(ObjectUtil.isNotEmpty(serMap)){ - return serMap.get("sSqlContent").toString(); + try{ + String searchText = session.getCurrentScene().getSId()+"_"+session.getCurrentTool().getSId()+input; + //SqlValidateUtil.getsKey( session.getCurrentScene().getSId(), session.getCurrentTool().getSId(), SqlValidateUtil.getsQuestion(session.getSUserQuestionList())); + //根据问题查询向量库 + Map serMap = aiGlobalAgentQuestionSqlEmitterService.queryAiGlobalAgentQuestionSqlEmitter(searchText, "ai_global_agent_question_sql"); + if(ObjectUtil.isNotEmpty(serMap)){ + return serMap.get("sSqlContent").toString(); + } + }catch (Exception e){ + } return null; } @@ -639,11 +730,12 @@ public class XlyErpService { public void doAiUserAgentQuestion(UserSceneSession session, String sQuestion, String sSqlContent, + String cachType, List chatMessage ){ MultiThreadPoolServer mts = MultiThreadPoolServer.getInstance(); - AiUserAgentQuestionThread at = new AiUserAgentQuestionThread(session,sQuestion,sSqlContent,chatMessage); + AiUserAgentQuestionThread at = new AiUserAgentQuestionThread(session,sQuestion,sSqlContent,cachType,chatMessage); mts.service(at); } diff --git a/src/main/java/com/xly/thread/AiUserAgentQuestionThread.java b/src/main/java/com/xly/thread/AiUserAgentQuestionThread.java index 1827efa..82fa20c 100644 --- a/src/main/java/com/xly/thread/AiUserAgentQuestionThread.java +++ b/src/main/java/com/xly/thread/AiUserAgentQuestionThread.java @@ -21,14 +21,16 @@ public class AiUserAgentQuestionThread implements Runnable { private UserSceneSession session; private String sSqlContent; private String sQuestion; + private String cachType; List userMessage; public final String sProName="Sp_Ai_AiUserAgentQuestionThread"; - public AiUserAgentQuestionThread(UserSceneSession session,String sQuestion,String sSqlContent, List userMessage ) { + public AiUserAgentQuestionThread(UserSceneSession session,String sQuestion,String sSqlContent,String cachType, List userMessage ) { this.session = session; this.sSqlContent = sSqlContent; this.sQuestion = sQuestion; this.userMessage = userMessage; + this.cachType = cachType; } @Override @@ -44,6 +46,7 @@ public class AiUserAgentQuestionThread implements Runnable { Map data = getMap(sSceneId, sMethodId,bRedis,sQuestionGroupNo); data.put("sQuestion",SqlValidateUtil.getsQuestion(session.getSUserQuestionList())); data.put("sId",new UUIDGenerator().next()); + data.put("cachType",cachType); //插入Redis缓存 if(bRedis==1 && ObjectUtil.isNotEmpty(sSqlContent)){ String sReidKey = SqlValidateUtil.getsKey( sSceneId, sMethodId, sQuestionGroupNo); @@ -52,7 +55,7 @@ public class AiUserAgentQuestionThread implements Runnable { String sKey = sSceneId+"_"+sMethodId +"_"+sQuestion; // SqlValidateUtil.getsKey( sSceneId, sMethodId, SqlValidateUtil.getsQuestion(session.getSUserQuestionList())); //存入向量库 - aiGlobalAgentQuestionSqlEmitterService.addAiGlobalAgentQuestionSqlEmitter(sKey,data,sQuestion,sSqlContent,"ai_global_agent_question_sql"); + aiGlobalAgentQuestionSqlEmitterService.addAiGlobalAgentQuestionSqlEmitter(sKey,data,sQuestion,sSqlContent,cachType,"ai_global_agent_question_sql"); //调用数据库插入数据库 Map searMap = dynamicExeDbService.getDoProMap(sProName, data); dynamicExeDbService.getCallPro(searMap, sProName); diff --git a/src/main/java/com/xly/tool/DynamicToolProvider.java b/src/main/java/com/xly/tool/DynamicToolProvider.java index 45dac58..691bf1d 100644 --- a/src/main/java/com/xly/tool/DynamicToolProvider.java +++ b/src/main/java/com/xly/tool/DynamicToolProvider.java @@ -903,17 +903,18 @@ public class DynamicToolProvider implements ToolProvider { Map returnMap = transformationArgs( args, paramDefs); return paramDefs.stream() .filter(pd -> Boolean.TRUE.equals(pd.getBEmpty()) && pd.getBTipModel()) - .filter(pd -> - (!returnMap.containsKey(pd.getSParam()) - || (ObjectUtil.isEmpty(returnMap.get(pd.getSParam()))) - || (pd.getSParamValue().startsWith("d") && 0==Double.valueOf (returnMap.get(pd.getSParam()).toString())) - ) - && (!returnMap.containsKey(pd.getSParamValue()) || (ObjectUtil.isEmpty(returnMap.get(pd.getSParamValue())))) - ) + .filter(pd -> checkMiss( returnMap, pd)) .map(ParamRule::getSParam) .toList(); } + private Boolean checkMiss(Map returnMap,ParamRule pd) { + Boolean bBhcs = (ObjectUtil.isEmpty(returnMap.get(pd.getSParam()))); + Boolean bDbZero = (pd.getSParamValue().startsWith("d") && 0 == Double.valueOf(returnMap.get(pd.getSParam()).toString())); + return bDbZero || bBhcs || (!returnMap.containsKey(pd.getSParamValue()) || (ObjectUtil.isEmpty(returnMap.get(pd.getSParamValue())))); + } + + /** * 确认后必填参数 */ diff --git a/src/main/java/com/xly/tts/bean/TTSResponseDTO.java b/src/main/java/com/xly/tts/bean/TTSResponseDTO.java index cf9200c..a32f2fb 100644 --- a/src/main/java/com/xly/tts/bean/TTSResponseDTO.java +++ b/src/main/java/com/xly/tts/bean/TTSResponseDTO.java @@ -72,6 +72,15 @@ public class TTSResponseDTO implements Serializable { private String sReturnType = ReturnTypeCode.MAKEDOWN.getCode(); private Boolean ErpComplete; + /** + * 数据库类型 X: 向量库 S:数据库 + */ + private String dbType; + + /** + * 数据库类型 H: 缓存 D: 动态 + */ + private String dbCach; /** * 创建失败响应 diff --git a/src/main/java/com/xly/tts/service/PythonTtsProxyService.java b/src/main/java/com/xly/tts/service/PythonTtsProxyService.java index d573917..4659254 100644 --- a/src/main/java/com/xly/tts/service/PythonTtsProxyService.java +++ b/src/main/java/com/xly/tts/service/PythonTtsProxyService.java @@ -278,6 +278,8 @@ public class PythonTtsProxyService { .sSceneName(aiResponseDTO.getSSceneName()) .sMethodName(aiResponseDTO.getSMethodName()) .sReturnType(aiResponseDTO.getSReturnType()) + .dbType(aiResponseDTO.getDbType()) + .dbCach(aiResponseDTO.getDbCach()) .sCommonts(BusinessCode.COMMONTS.getMessage()) .timestamp(System.currentTimeMillis()) .textLength((aiText + systemText).length()) diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index b237890..0132d77 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -127,11 +127,12 @@ langchain4j: # 聊天模型配置(用于一般对话) base-url: http://112.82.245.194:11434 chat-model-name: qwen2.5:7b-instruct +# chat-model-name: qwen3:14b # chat-model-name: qwen3.5:9b # SQL/代码模型配置(专门用于代码和SQL生成) sql-model-name: qwen2.5-coder:7b +# sql-model-name: SimonPu/qwen3-coder:30B-Instruct_Q4_K_XL # sql-model-name: qwen2.5-coder:32b -# sql-model-name: mdq100/qwen3.5-coder:35b # 或者如果两个模型在同一服务器,可以使用同一个URL # ollama: @@ -140,7 +141,9 @@ langchain4j: # chat-model-name: qwen3.5:9b # # SQL/代码模型配置(专门用于代码和SQL生成) # sql-model-name: mdq100/qwen3.5-coder:35b - +tesseract: + # 这里填写你存放 tessdata 的目录路径,注意不是 tessdata 文件夹本身 + datapath: D:/xlyweberp/Tesseract-OCR/tessdata mybatis: mapper-locations: classpath:mapper/*.xml type-aliases-package: com.xly.entity