Commit 8cb723cdcaa669d899bf92d53e822bfe937f8e3b
1 parent
d72de0c4
添加向量库
Showing
1 changed file
with
226 additions
and
41 deletions
src/main/java/com/xly/milvus/service/impl/MilvusServiceImpl.java
| @@ -337,30 +337,122 @@ public class MilvusServiceImpl implements MilvusService { | @@ -337,30 +337,122 @@ public class MilvusServiceImpl implements MilvusService { | ||
| 337 | } | 337 | } |
| 338 | 338 | ||
| 339 | @Override | 339 | @Override |
| 340 | - public List<Map<String, Object>> getDataToCollection(String collectionName, String milvusFilter,String searchText,Integer size,List<String> fields,String vectorValue,String sceneName){ | ||
| 341 | - log.info("开始相似度查询: collection={}, searchText={}", collectionName, searchText); | ||
| 342 | - // 2. 设置范围搜索参数 | ||
| 343 | - Map<String, Object> searchParams = new HashMap<>(); | ||
| 344 | - searchParams.put("nprobe", 10); | ||
| 345 | - // 对于 IP 度量,相似度范围在 [minScore, maxScore] | ||
| 346 | - searchParams.put("radius", 0.9); // 最小相似度 | ||
| 347 | - searchParams.put("range_filter", 1); // 最大相似度 | ||
| 348 | - if(ObjectUtil.isEmpty(fields)){ | 340 | + public List<Map<String, Object>> getDataToCollection(String collectionName, |
| 341 | + String milvusFilter, | ||
| 342 | + String searchText, | ||
| 343 | + Integer size, | ||
| 344 | + List<String> fields, | ||
| 345 | + String vectorValue, | ||
| 346 | + String sceneName) { | ||
| 347 | + log.info("开始查询: collection={}, searchText={}, vectorValue={}", collectionName, searchText, vectorValue); | ||
| 348 | + | ||
| 349 | + // 设置输出字段 | ||
| 350 | + if (ObjectUtil.isEmpty(fields)) { | ||
| 349 | fields = new ArrayList<>(); | 351 | fields = new ArrayList<>(); |
| 350 | } | 352 | } |
| 351 | fields.add("sSlaveId"); | 353 | fields.add("sSlaveId"); |
| 352 | fields.add("metadata"); | 354 | fields.add("metadata"); |
| 353 | - // 1. 构建查询(通用) | 355 | + |
| 356 | + // 根据 vectorValue 判断走哪个查询 | ||
| 357 | + if (ObjectUtil.isNotEmpty(vectorValue)) { | ||
| 358 | + // vectorValue 不为空:走向量相似度搜索(使用 SearchReq) | ||
| 359 | + log.info("使用向量相似度搜索,vectorValue 存在"); | ||
| 360 | + return similaritySearchWithVector(collectionName, milvusFilter, size, fields, vectorValue); | ||
| 361 | +// } else if (ObjectUtil.isNotEmpty(searchText)) { | ||
| 362 | +// // vectorValue 为空但 searchText 不为空:走文本向量化搜索(使用 SearchReq) | ||
| 363 | +// log.info("使用文本向量化搜索,searchText={}", searchText); | ||
| 364 | +// return similaritySearchWithText(collectionName, milvusFilter, searchText, size, fields); | ||
| 365 | + } else { | ||
| 366 | + // vectorValue 和 searchText 都为空:走全量数据查询(使用 QueryReq) | ||
| 367 | + log.info("使用全量数据查询,无搜索条件"); | ||
| 368 | + return queryAllData(collectionName, milvusFilter, fields,size); | ||
| 369 | + } | ||
| 370 | + } | ||
| 371 | + | ||
| 372 | + /** | ||
| 373 | + * 全量数据查询(使用 Query API) | ||
| 374 | + */ | ||
| 375 | + private List<Map<String, Object>> queryAllData(String collectionName, | ||
| 376 | + String milvusFilter, | ||
| 377 | + List<String> fields, | ||
| 378 | + Integer size) { | ||
| 379 | + // 使用 QueryReqBuilder(不需要向量数据) | ||
| 380 | + QueryReq.QueryReqBuilder builder = QueryReq.builder() | ||
| 381 | + .collectionName(collectionName) | ||
| 382 | + .outputFields(fields); | ||
| 383 | + if (ObjectUtil.isNotEmpty(milvusFilter)) { | ||
| 384 | + builder.filter(milvusFilter); | ||
| 385 | + } | ||
| 386 | + // 可选:如果 size 有值,可以设置 limit | ||
| 387 | + if (ObjectUtil.isNotEmpty(size)) { | ||
| 388 | + builder.limit(size); | ||
| 389 | + } | ||
| 390 | + QueryReq queryReq = builder.build(); | ||
| 391 | + QueryResp queryResp = milvusClient.query(queryReq); | ||
| 392 | + return processMilvusResults(queryResp); | ||
| 393 | + } | ||
| 394 | + | ||
| 395 | + /** | ||
| 396 | + * 使用文本向量化进行相似度搜索 | ||
| 397 | + */ | ||
| 398 | + private List<Map<String, Object>> similaritySearchWithText(String collectionName, | ||
| 399 | + String milvusFilter, | ||
| 400 | + String searchText, | ||
| 401 | + Integer size, | ||
| 402 | + List<String> fields) { | ||
| 403 | + // 向量化搜索文本 | ||
| 404 | + List<Float> vectorList = vectorizationService.textToVector(searchText); | ||
| 405 | + if (vectorList == null || vectorList.isEmpty()) { | ||
| 406 | + throw new RuntimeException("向量化失败"); | ||
| 407 | + } | ||
| 408 | + | ||
| 409 | + // 转换为 float[] | ||
| 410 | + float[] floatArray = new float[vectorList.size()]; | ||
| 411 | + for (int i = 0; i < vectorList.size(); i++) { | ||
| 412 | + floatArray[i] = vectorList.get(i); | ||
| 413 | + } | ||
| 414 | + | ||
| 415 | + // 设置搜索参数 | ||
| 416 | + Map<String, Object> searchParams = new HashMap<>(); | ||
| 417 | + searchParams.put("nprobe", 10); | ||
| 418 | + searchParams.put("radius", 0.9); | ||
| 419 | + searchParams.put("range_filter", 1); | ||
| 420 | + | ||
| 421 | + // 构建搜索请求 | ||
| 422 | + FloatVec floatVec = new FloatVec(floatArray); | ||
| 354 | SearchReq.SearchReqBuilder builder = SearchReq.builder() | 423 | SearchReq.SearchReqBuilder builder = SearchReq.builder() |
| 355 | .collectionName(collectionName) | 424 | .collectionName(collectionName) |
| 356 | .topK(size) | 425 | .topK(size) |
| 357 | .metricType(IndexParam.MetricType.IP) | 426 | .metricType(IndexParam.MetricType.IP) |
| 358 | .outputFields(fields) | 427 | .outputFields(fields) |
| 359 | -// .filterType(FilterType.POST_FILTER) | ||
| 360 | - .searchParams(searchParams); | ||
| 361 | - if(ObjectUtil.isEmpty(vectorValue)){ | ||
| 362 | - vectorValue = sceneName; | 428 | + .searchParams(searchParams) |
| 429 | + .data(Collections.singletonList(floatVec)) | ||
| 430 | + .annsField("vector"); | ||
| 431 | + | ||
| 432 | + if (ObjectUtil.isNotEmpty(milvusFilter)) { | ||
| 433 | + builder.filter(milvusFilter); | ||
| 363 | } | 434 | } |
| 435 | + | ||
| 436 | + SearchReq searchReq = builder.build(); | ||
| 437 | + SearchResp searchResp = milvusClient.search(searchReq); | ||
| 438 | + | ||
| 439 | + return processMilvusResults(searchResp); | ||
| 440 | + } | ||
| 441 | + | ||
| 442 | + /** | ||
| 443 | + * 使用已有向量进行相似度搜索 | ||
| 444 | + */ | ||
| 445 | + private List<Map<String, Object>> similaritySearchWithVector(String collectionName, | ||
| 446 | + String milvusFilter, | ||
| 447 | + Integer size, | ||
| 448 | + List<String> fields, | ||
| 449 | + String vectorValue) { | ||
| 450 | + // 设置搜索参数 | ||
| 451 | + Map<String, Object> searchParams = new HashMap<>(); | ||
| 452 | + searchParams.put("nprobe", 10); | ||
| 453 | + searchParams.put("radius", 0.9); | ||
| 454 | + searchParams.put("range_filter", 1); | ||
| 455 | + // 解析向量字符串为 float 数组 | ||
| 364 | // 1. 向量化搜索文本 | 456 | // 1. 向量化搜索文本 |
| 365 | List<Float> vectorList = vectorizationService.textToVector(vectorValue); | 457 | List<Float> vectorList = vectorizationService.textToVector(vectorValue); |
| 366 | if (vectorList == null || vectorList.isEmpty()) { | 458 | if (vectorList == null || vectorList.isEmpty()) { |
| @@ -373,31 +465,72 @@ public class MilvusServiceImpl implements MilvusService { | @@ -373,31 +465,72 @@ public class MilvusServiceImpl implements MilvusService { | ||
| 373 | } | 465 | } |
| 374 | // 3. 创建 Milvus FloatVec 对象 | 466 | // 3. 创建 Milvus FloatVec 对象 |
| 375 | FloatVec floatVec = new FloatVec(floatArray); | 467 | FloatVec floatVec = new FloatVec(floatArray); |
| 376 | - builder.data(Collections.singletonList(floatVec)) | ||
| 377 | - .annsField("vector"); // 向量字段名 | 468 | + SearchReq.SearchReqBuilder builder = SearchReq.builder() |
| 469 | + .collectionName(collectionName) | ||
| 470 | + .topK(size) | ||
| 471 | + .metricType(IndexParam.MetricType.IP) | ||
| 472 | + .outputFields(fields) | ||
| 473 | + .searchParams(searchParams) | ||
| 474 | + .data(Collections.singletonList(floatVec)) | ||
| 475 | + .annsField("vector"); | ||
| 378 | 476 | ||
| 379 | - if(ObjectUtil.isNotEmpty(milvusFilter)){ | 477 | + if (ObjectUtil.isNotEmpty(milvusFilter)) { |
| 380 | builder.filter(milvusFilter); | 478 | builder.filter(milvusFilter); |
| 381 | } | 479 | } |
| 382 | - // 4. 构建搜索请求 | ||
| 383 | -// SearchReq searchReq = SearchReq.builder() | ||
| 384 | -// .collectionName(collectionName) | ||
| 385 | -// .data(Collections.singletonList(floatVec)) | ||
| 386 | -// .annsField("vector") // 向量字段名 | ||
| 387 | -// .topK(size) // 返回最相似的10条 | ||
| 388 | -// .metricType(IndexParam.MetricType.IP) // 内积相似度 | ||
| 389 | -// .outputFields(fields) | ||
| 390 | -// .searchParams(searchParams) | ||
| 391 | -// .filter(milvusFilter) | ||
| 392 | -// .build(); | ||
| 393 | - // 5. 执行搜索 | 480 | + |
| 394 | SearchReq searchReq = builder.build(); | 481 | SearchReq searchReq = builder.build(); |
| 395 | SearchResp searchResp = milvusClient.search(searchReq); | 482 | SearchResp searchResp = milvusClient.search(searchReq); |
| 396 | - | ||
| 397 | - // 6. 处理结果 | ||
| 398 | return processMilvusResults(searchResp); | 483 | return processMilvusResults(searchResp); |
| 399 | } | 484 | } |
| 400 | 485 | ||
| 486 | +// public List<Map<String, Object>> getDataToCollection(String collectionName, String milvusFilter,String searchText,Integer size,List<String> fields,String vectorValue,String sceneName){ | ||
| 487 | +// log.info("开始相似度查询: collection={}, searchText={}", collectionName, searchText); | ||
| 488 | +// // 2. 设置范围搜索参数 | ||
| 489 | +// Map<String, Object> searchParams = new HashMap<>(); | ||
| 490 | +// searchParams.put("nprobe", 10); | ||
| 491 | +// // 对于 IP 度量,相似度范围在 [minScore, maxScore] | ||
| 492 | +// searchParams.put("radius", 0.9); // 最小相似度 | ||
| 493 | +// searchParams.put("range_filter", 1); // 最大相似度 | ||
| 494 | +// if(ObjectUtil.isEmpty(fields)){ | ||
| 495 | +// fields = new ArrayList<>(); | ||
| 496 | +// } | ||
| 497 | +// fields.add("sSlaveId"); | ||
| 498 | +// fields.add("metadata"); | ||
| 499 | +// // 1. 构建查询(通用) | ||
| 500 | +// SearchReq.SearchReqBuilder builder = SearchReq.builder() | ||
| 501 | +// .collectionName(collectionName) | ||
| 502 | +// .topK(size) | ||
| 503 | +// .metricType(IndexParam.MetricType.IP) | ||
| 504 | +// .outputFields(fields) | ||
| 505 | +//// .filterType(FilterType.POST_FILTER) | ||
| 506 | +// .searchParams(searchParams); | ||
| 507 | +// if(ObjectUtil.isNotEmpty(vectorValue)){ | ||
| 508 | +// // 1. 向量化搜索文本 | ||
| 509 | +// List<Float> vectorList = vectorizationService.textToVector(vectorValue); | ||
| 510 | +// if (vectorList == null || vectorList.isEmpty()) { | ||
| 511 | +// throw new RuntimeException("向量化失败"); | ||
| 512 | +// } | ||
| 513 | +// // 2. 转换为 float[] | ||
| 514 | +// float[] floatArray = new float[vectorList.size()]; | ||
| 515 | +// for (int i = 0; i < vectorList.size(); i++) { | ||
| 516 | +// floatArray[i] = vectorList.get(i); | ||
| 517 | +// } | ||
| 518 | +// // 3. 创建 Milvus FloatVec 对象 | ||
| 519 | +// FloatVec floatVec = new FloatVec(floatArray); | ||
| 520 | +// builder.data(Collections.singletonList(floatVec)) | ||
| 521 | +// .annsField("vector"); // 向量字段名 | ||
| 522 | +// } | ||
| 523 | +// if(ObjectUtil.isNotEmpty(milvusFilter)){ | ||
| 524 | +// builder.filter(milvusFilter); | ||
| 525 | +// } | ||
| 526 | +// // 5. 执行搜索 | ||
| 527 | +// SearchReq searchReq = builder.build(); | ||
| 528 | +// SearchResp searchResp = milvusClient.search(searchReq); | ||
| 529 | +// | ||
| 530 | +// // 6. 处理结果 | ||
| 531 | +// return processMilvusResults(searchResp); | ||
| 532 | +// } | ||
| 533 | + | ||
| 401 | 534 | ||
| 402 | /** | 535 | /** |
| 403 | * 判断 Milvus 过滤条件是否有效(支持 TEXT_MATCH 全文检索) | 536 | * 判断 Milvus 过滤条件是否有效(支持 TEXT_MATCH 全文检索) |
| @@ -559,7 +692,7 @@ public class MilvusServiceImpl implements MilvusService { | @@ -559,7 +692,7 @@ public class MilvusServiceImpl implements MilvusService { | ||
| 559 | } | 692 | } |
| 560 | 693 | ||
| 561 | /** | 694 | /** |
| 562 | - * 处理 Milvus 查询结果 | 695 | + * 处理 SearchResp 查询结果(向量相似度搜索) |
| 563 | */ | 696 | */ |
| 564 | private List<Map<String, Object>> processMilvusResults(SearchResp response) { | 697 | private List<Map<String, Object>> processMilvusResults(SearchResp response) { |
| 565 | List<Map<String, Object>> results = new ArrayList<>(); | 698 | List<Map<String, Object>> results = new ArrayList<>(); |
| @@ -572,32 +705,84 @@ public class MilvusServiceImpl implements MilvusService { | @@ -572,32 +705,84 @@ public class MilvusServiceImpl implements MilvusService { | ||
| 572 | log.warn("Milvus 搜索结果为空"); | 705 | log.warn("Milvus 搜索结果为空"); |
| 573 | return results; | 706 | return results; |
| 574 | } | 707 | } |
| 708 | + | ||
| 575 | // 遍历每个查询的结果集(通常只有一个查询) | 709 | // 遍历每个查询的结果集(通常只有一个查询) |
| 576 | for (List<SearchResp.SearchResult> resultList : searchResults) { | 710 | for (List<SearchResp.SearchResult> resultList : searchResults) { |
| 577 | // 遍历每个搜索结果 | 711 | // 遍历每个搜索结果 |
| 578 | for (SearchResp.SearchResult result : resultList) { | 712 | for (SearchResp.SearchResult result : resultList) { |
| 713 | + Map<String, Object> resultMap = new HashMap<>(); | ||
| 714 | + | ||
| 579 | // 获取实体字段数据 | 715 | // 获取实体字段数据 |
| 580 | Map<String, Object> entity = result.getEntity(); | 716 | Map<String, Object> entity = result.getEntity(); |
| 581 | - Map<String,Object> metadata = new HashMap<>(); | ||
| 582 | - if(ObjectUtil.isNotEmpty(entity.get("metadata"))){ | ||
| 583 | - JsonObject obj = (JsonObject) entity.get("metadata"); | ||
| 584 | - metadata.putAll( jsonObjectToMap(obj)); | 717 | + if (entity != null && !entity.isEmpty()) { |
| 718 | + resultMap.putAll(entity); | ||
| 719 | + } | ||
| 720 | + | ||
| 721 | + // 处理 metadata 字段(如果是 JSON 对象) | ||
| 722 | + if (resultMap.containsKey("metadata") && resultMap.get("metadata") instanceof JsonObject) { | ||
| 723 | + JsonObject jsonObject = (JsonObject) resultMap.get("metadata"); | ||
| 724 | + Map<String, Object> metadataMap = jsonObjectToMap(jsonObject); | ||
| 725 | + // 可以选择合并或替换 | ||
| 726 | + resultMap.putAll(metadataMap); | ||
| 727 | + resultMap.remove("metadata"); // 移除原始的 metadata 对象 | ||
| 585 | } | 728 | } |
| 729 | + | ||
| 586 | // 获取相似度分数 | 730 | // 获取相似度分数 |
| 587 | Float score = result.getScore(); | 731 | Float score = result.getScore(); |
| 588 | if (score != null) { | 732 | if (score != null) { |
| 589 | - metadata.put("score", score); | 733 | + resultMap.put("score", score); |
| 590 | } | 734 | } |
| 591 | - // 将所有字段添加到结果中 | ||
| 592 | -// item.putAll(entity); | ||
| 593 | - results.add(metadata); | 735 | + |
| 736 | + results.add(resultMap); | ||
| 594 | } | 737 | } |
| 595 | } | 738 | } |
| 739 | + | ||
| 596 | log.info("处理完成,共 {} 条搜索结果", results.size()); | 740 | log.info("处理完成,共 {} 条搜索结果", results.size()); |
| 597 | return results; | 741 | return results; |
| 598 | } | 742 | } |
| 599 | 743 | ||
| 600 | /** | 744 | /** |
| 745 | + * 处理 QueryResp 查询结果(全量查询) | ||
| 746 | + */ | ||
| 747 | + private List<Map<String, Object>> processMilvusResults(QueryResp response) { | ||
| 748 | + List<Map<String, Object>> results = new ArrayList<>(); | ||
| 749 | + if (response == null) { | ||
| 750 | + log.warn("Milvus 响应为空"); | ||
| 751 | + return results; | ||
| 752 | + } | ||
| 753 | + | ||
| 754 | + List<QueryResp.QueryResult> queryResults = response.getQueryResults(); | ||
| 755 | + if (queryResults == null || queryResults.isEmpty()) { | ||
| 756 | + log.warn("Milvus 查询结果为空"); | ||
| 757 | + return results; | ||
| 758 | + } | ||
| 759 | + | ||
| 760 | + // 遍历每个查询结果 | ||
| 761 | + for (QueryResp.QueryResult queryResult : queryResults) { | ||
| 762 | + Map<String, Object> resultMap = new HashMap<>(); | ||
| 763 | + | ||
| 764 | + // 获取实体字段数据 | ||
| 765 | + Map<String, Object> entity = queryResult.getEntity(); | ||
| 766 | + if (entity != null && !entity.isEmpty()) { | ||
| 767 | + resultMap.putAll(entity); | ||
| 768 | + } | ||
| 769 | + | ||
| 770 | + // 处理 metadata 字段(如果是 JSON 对象) | ||
| 771 | + if (resultMap.containsKey("metadata") && resultMap.get("metadata") instanceof JsonObject) { | ||
| 772 | + JsonObject jsonObject = (JsonObject) resultMap.get("metadata"); | ||
| 773 | + Map<String, Object> metadataMap = jsonObjectToMap(jsonObject); | ||
| 774 | + resultMap.putAll(metadataMap); | ||
| 775 | + resultMap.remove("metadata"); | ||
| 776 | + } | ||
| 777 | + | ||
| 778 | + results.add(resultMap); | ||
| 779 | + } | ||
| 780 | + | ||
| 781 | + log.info("处理完成,共 {} 条查询结果", results.size()); | ||
| 782 | + return results; | ||
| 783 | + } | ||
| 784 | + | ||
| 785 | + /** | ||
| 601 | * JsonObject 转 Map<String, Object> | 786 | * JsonObject 转 Map<String, Object> |
| 602 | */ | 787 | */ |
| 603 | public static Map<String, Object> jsonObjectToMap(JsonObject jsonObject) { | 788 | public static Map<String, Object> jsonObjectToMap(JsonObject jsonObject) { |
| @@ -693,7 +878,7 @@ public class MilvusServiceImpl implements MilvusService { | @@ -693,7 +878,7 @@ public class MilvusServiceImpl implements MilvusService { | ||
| 693 | } | 878 | } |
| 694 | vectorText.append(" | ").append(fieldArr[0]).append(":").append(sText); | 879 | vectorText.append(" | ").append(fieldArr[0]).append(":").append(sText); |
| 695 | } | 880 | } |
| 696 | - vectorText.append(" ").append(sceneName); | 881 | +// vectorText.append(" ").append(sceneName).append("数据"); |
| 697 | } | 882 | } |
| 698 | 883 | ||
| 699 | 884 |