Commit 7e0691c7d5940a1edb8f3fd291e17b6f5d3ca0e9

Authored by yanghl
1 parent 3ea4dac5

语音流式方式处理,按顺序播放。

src/main/java/com/xly/tts/bean/TTSResponseDTO.java
@@ -54,6 +54,7 @@ public class TTSResponseDTO implements Serializable { @@ -54,6 +54,7 @@ public class TTSResponseDTO implements Serializable {
54 private String audioBase64; 54 private String audioBase64;
55 private Integer audioSize; 55 private Integer audioSize;
56 private String audioFormat; 56 private String audioFormat;
  57 + private String audioText;
57 58
58 // 或者只返回音频URL 59 // 或者只返回音频URL
59 private String audioUrl; 60 private String audioUrl;
src/main/java/com/xly/tts/service/LocalAudioCache.java
1 package com.xly.tts.service; 1 package com.xly.tts.service;
2 2
3 import com.xly.tts.bean.TTSResponseDTO; 3 import com.xly.tts.bean.TTSResponseDTO;
4 -import java.util.Map; 4 +import java.util.*;
5 import java.util.concurrent.ConcurrentHashMap; 5 import java.util.concurrent.ConcurrentHashMap;
6 6
7 public class LocalAudioCache { 7 public class LocalAudioCache {
8 - private static final Map<String, TTSResponseDTO> CACHE = new ConcurrentHashMap<>();  
9 8
10 - public static void put(String text, TTSResponseDTO dto) {  
11 - CACHE.put(text, dto);  
12 - // 5分钟后自动清理  
13 - new Thread(() -> {  
14 - try {  
15 - Thread.sleep(5 * 60 * 1000);  
16 - CACHE.remove(text);  
17 - } catch (Exception ignored) {}  
18 - }).start(); 9 + // 内部存储结构:cacheKey_index -> { "text":"...", "audio":"base64" }
  10 + private static final Map<String, Map<String, String>> CACHE = new ConcurrentHashMap<>();
  11 +
  12 + // 存储:一段文字 + 一段音频
  13 + public static void addPiece(String cacheKey, int index, String text, String audioBase64) {
  14 + String key = cacheKey + "_" + index;
  15 + CACHE.put(key, Map.of("text", text, "audio", audioBase64));
19 } 16 }
20 17
21 - public static TTSResponseDTO get(String text) {  
22 - return CACHE.get(text); 18 + // 获取:一段文字 + 音频
  19 + public static Map<String, String> getPiece(String cacheKey, int index) {
  20 + return CACHE.get(cacheKey + "_" + index);
23 } 21 }
24 } 22 }
25 \ No newline at end of file 23 \ No newline at end of file
src/main/java/com/xly/tts/service/PythonTtsProxyService.java
@@ -18,15 +18,13 @@ import org.springframework.stereotype.Service; @@ -18,15 +18,13 @@ import org.springframework.stereotype.Service;
18 import org.springframework.web.client.RestTemplate; 18 import org.springframework.web.client.RestTemplate;
19 19
20 import javax.annotation.PostConstruct; 20 import javax.annotation.PostConstruct;
21 -import java.io.ByteArrayInputStream;  
22 -import java.io.InputStream; 21 +import java.io.*;
23 import java.util.*; 22 import java.util.*;
24 import java.util.concurrent.CompletableFuture; 23 import java.util.concurrent.CompletableFuture;
25 import java.util.concurrent.ExecutorService; 24 import java.util.concurrent.ExecutorService;
26 import java.util.concurrent.Executors; 25 import java.util.concurrent.Executors;
27 import java.net.URL; 26 import java.net.URL;
28 import java.net.HttpURLConnection; 27 import java.net.HttpURLConnection;
29 -import java.io.OutputStream;  
30 import java.io.InputStream; 28 import java.io.InputStream;
31 29
32 @Slf4j 30 @Slf4j
@@ -135,11 +133,18 @@ public class PythonTtsProxyService { @@ -135,11 +133,18 @@ public class PythonTtsProxyService {
135 } 133 }
136 String voiceTextNew = AdvancedSymbolRemover.removePunctuationHtml(aiText); 134 String voiceTextNew = AdvancedSymbolRemover.removePunctuationHtml(aiText);
137 135
  136 + // ============================
  137 + // 【绝对唯一】不会重复、不会覆盖
  138 + // ============================
  139 + String cacheKey = request.getUserid() + "_" + System.nanoTime();
  140 +
138 TTSResponseDTO dto = TTSResponseDTO.builder() 141 TTSResponseDTO dto = TTSResponseDTO.builder()
139 .code(200) 142 .code(200)
140 .message("success") 143 .message("success")
  144 + .cacheKey(cacheKey) // 前端靠这个取自己的分段
141 .originalText(request.getText()) 145 .originalText(request.getText())
142 .processedText(aiText) 146 .processedText(aiText)
  147 + .audioText(voiceTextNew)
143 .systemText(systemText) 148 .systemText(systemText)
144 .voice(request.getVoice()) 149 .voice(request.getVoice())
145 .sSceneName(aiResponseDTO.getSSceneName()) 150 .sSceneName(aiResponseDTO.getSSceneName())
@@ -155,52 +160,101 @@ public class PythonTtsProxyService { @@ -155,52 +160,101 @@ public class PythonTtsProxyService {
155 return ResponseEntity.ok(dto); 160 return ResponseEntity.ok(dto);
156 } 161 }
157 162
158 - // ==============================================  
159 - // 👇 【关键】生成 全局唯一的 key(多用户不冲突)  
160 - // ==============================================  
161 - String cacheKey = request.getUserid() + "_" + System.currentTimeMillis() + "_" + request.getText();  
162 - 163 + // 平均分割文字
  164 + List<String> textParts = splitTextSmart(voiceTextNew, 30);
  165 + dto.setAudioSize(textParts.size());
  166 + // 异步分段合成
163 CompletableFuture.runAsync(() -> { 167 CompletableFuture.runAsync(() -> {
164 - try {  
165 - Map<String, Object> params = new HashMap<>();  
166 - params.put("text", voiceTextNew);  
167 - params.put("voice", request.getVoice());  
168 - params.put("rate", request.getRate() != null ? request.getRate() : "+10%");  
169 - params.put("volume", request.getVolume() != null ? request.getVolume() : "+0%");  
170 -  
171 - HttpHeaders headers = new HttpHeaders();  
172 - headers.setContentType(MediaType.APPLICATION_JSON);  
173 - headers.setAccept(Collections.singletonList(MediaType.APPLICATION_OCTET_STREAM));  
174 - HttpEntity<Map<String, Object>> entity = new HttpEntity<>(params, headers);  
175 -  
176 - ResponseEntity<byte[]> response = restTemplate.exchange(  
177 - pythonServiceUrl + "/stream-synthesize",  
178 - HttpMethod.POST, entity, byte[].class  
179 - );  
180 -  
181 - if (response.getStatusCode().is2xxSuccessful() && response.getBody() != null) {  
182 - dto.setAudioBase64(Base64.getEncoder().encodeToString(response.getBody()));  
183 - dto.setAudioSize(response.getBody().length);  
184 - dto.setAudioFormat("audio/mpeg");  
185 -  
186 - // ==============================================  
187 - // 👇 用唯一key存(不覆盖别人)  
188 - // ==============================================  
189 - LocalAudioCache.put(cacheKey, dto); 168 + for (int i = 0; i < textParts.size(); i++) {
  169 + String part = textParts.get(i);
  170 + if (ObjectUtil.isEmpty(part)) continue;
  171 +
  172 + try {
  173 + Map<String, Object> params = new HashMap<>();
  174 + params.put("text", part);
  175 + params.put("voice", request.getVoice());
  176 + params.put("rate", request.getRate() != null ? request.getRate() : "+10%");
  177 + params.put("volume", request.getVolume() != null ? request.getVolume() : "+0%");
  178 +
  179 + HttpHeaders headers = new HttpHeaders();
  180 + headers.setContentType(MediaType.APPLICATION_JSON);
  181 + headers.setAccept(Collections.singletonList(MediaType.APPLICATION_OCTET_STREAM));
  182 + HttpEntity<Map<String, Object>> entity = new HttpEntity<>(params, headers);
  183 +
  184 + ResponseEntity<byte[]> response = restTemplate.exchange(
  185 + pythonServiceUrl + "/stream-synthesize",
  186 + HttpMethod.POST, entity, byte[].class
  187 + );
  188 +
  189 + if (response.getStatusCode().is2xxSuccessful() && response.getBody() != null) {
  190 + String base64 = Base64.getEncoder().encodeToString(response.getBody());
  191 +
  192 + // ============================
  193 + // 【关键】带序号存储!前端靠序号知道顺序!
  194 + // ============================
  195 + LocalAudioCache.addPiece(cacheKey, i, part, base64);
  196 + }
  197 + } catch (Exception e) {
  198 + log.warn("分段合成失败: {}", e.getMessage());
190 } 199 }
191 - } catch (Exception e) {  
192 - log.warn("语音合成忽略:{}", e.getMessage());  
193 } 200 }
194 }, executorService); 201 }, executorService);
195 202
196 - // ==============================================  
197 - // 👇 把 cacheKey 返回给前端(前端靠它取音频)  
198 - // ==============================================  
199 - dto.setCacheKey(cacheKey);  
200 -  
201 return ResponseEntity.ok(dto); 203 return ResponseEntity.ok(dto);
202 } 204 }
203 205
  206 + // ==============================================
  207 +// 智能分段:优先按 。!?; , 空格 断开
  208 +// 不会把一句话生硬切断,更自然
  209 +// ==============================================
  210 + private List<String> splitTextSmart(String text, int maxLength) {
  211 + List<String> parts = new ArrayList<>();
  212 + if (text == null || text.isEmpty()) return parts;
  213 +
  214 + int len = text.length();
  215 + int start = 0;
  216 +
  217 + while (start < len) {
  218 + int end = Math.min(start + maxLength, len);
  219 +
  220 + // 如果不是最后一段,寻找最近的断句点
  221 + if (end < len) {
  222 + // 优先按 。!?; 断句
  223 + int splitPos = lastIndexOfAny(text, start, end, '。', '!', '?', ';');
  224 + if (splitPos == -1) {
  225 + // 其次按 , 逗号
  226 + splitPos = lastIndexOfAny(text, start, end, ',');
  227 + }
  228 + if (splitPos == -1) {
  229 + // 最后按空格
  230 + splitPos = lastIndexOfAny(text, start, end, ' ');
  231 + }
  232 + if (splitPos != -1) {
  233 + end = splitPos + 1;
  234 + }
  235 + }
  236 +
  237 + String part = text.substring(start, end).trim();
  238 + if (!part.isEmpty()) {
  239 + parts.add(part);
  240 + }
  241 + start = end;
  242 + }
  243 + return parts;
  244 + }
  245 +
  246 + // 工具:查找最后出现的符号
  247 + private int lastIndexOfAny(String text, int start, int end, char... chars) {
  248 + for (int i = end - 1; i >= start; i--) {
  249 + for (char c : chars) {
  250 + if (text.charAt(i) == c) {
  251 + return i;
  252 + }
  253 + }
  254 + }
  255 + return -1;
  256 + }
  257 +
204 public ResponseEntity<InputStreamResource> getVoiceResult(TTSRequestDTO request) { 258 public ResponseEntity<InputStreamResource> getVoiceResult(TTSRequestDTO request) {
205 try { 259 try {
206 String voiceText = AdvancedSymbolRemover.removePunctuationHtml(request.getText()); 260 String voiceText = AdvancedSymbolRemover.removePunctuationHtml(request.getText());
src/main/java/com/xly/util/AdvancedSymbolRemover.java
@@ -15,26 +15,36 @@ public class AdvancedSymbolRemover { @@ -15,26 +15,36 @@ public class AdvancedSymbolRemover {
15 15
16 16
17 /** 17 /**
18 - * 移除所有标点符号(保留字母、数字、中文 18 + * 移除所有符号(保留字母、数字、中文、标点
19 */ 19 */
20 public static String removePunctuationHtml(String text) { 20 public static String removePunctuationHtml(String text) {
21 try{ 21 try{
22 if (text == null || text.isEmpty()) return ""; 22 if (text == null || text.isEmpty()) return "";
23 text = HtmlCleaner.cleanHtml(text); 23 text = HtmlCleaner.cleanHtml(text);
24 24
25 -  
26 text = text.replaceAll("br", ""); 25 text = text.replaceAll("br", "");
27 text = text.replaceAll("<br/>", ""); 26 text = text.replaceAll("<br/>", "");
28 text = text.replaceAll("</div>", ""); 27 text = text.replaceAll("</div>", "");
29 text = text.replaceAll("<div>", ""); 28 text = text.replaceAll("<div>", "");
30 text = text.replaceAll("&emsp;", ""); 29 text = text.replaceAll("&emsp;", "");
31 - // 👇 【安全正则】只删除 数字后面的 .0 或 .00 30 +
  31 + // 去掉数字末尾无用的 .0 .00
32 text = text.replaceAll("(?<=\\d)\\.0+(?!\\d)", ""); 32 text = text.replaceAll("(?<=\\d)\\.0+(?!\\d)", "");
33 - // 移除中文和英文标点  
34 - text = text.replaceAll("[\\pP\\p{Punct}]", "");  
35 33
36 - // 可选:只保留字母、数字、汉字、空格  
37 - text = text.replaceAll("[^\\p{L}\\p{N}\\p{Zs}]", ""); 34 + // 去掉无用文字
  35 + text = text.replaceAll("换一换", "");
  36 +
  37 + // 去掉 -,但保留负数
  38 + text = text.replaceAll("(?<!\\d)-(?![\\d.])|(?<=\\d)-", "");
  39 +
  40 + // ============================
  41 + // 🔥 核心:只保留 中文、英文、数字、小数点、负号、空格
  42 + // 其他所有符号全部清空!
  43 + // ============================
  44 + text = text.replaceAll("[^a-zA-Z0-9\\u4e00-\\u9fa5\\-. ]", "");
  45 +
  46 + // 多余空格变成单个空格(更干净)
  47 + text = text.replaceAll("\\s+", " ").trim();
38 48
39 return text; 49 return text;
40 }catch (Exception e){ 50 }catch (Exception e){
src/main/java/com/xly/web/TTSStreamController.java
@@ -19,6 +19,7 @@ import javax.annotation.PostConstruct; @@ -19,6 +19,7 @@ import javax.annotation.PostConstruct;
19 import javax.annotation.PreDestroy; 19 import javax.annotation.PreDestroy;
20 import java.io.InputStream; 20 import java.io.InputStream;
21 import java.util.List; 21 import java.util.List;
  22 +import java.util.Map;
22 import java.util.concurrent.CompletableFuture; 23 import java.util.concurrent.CompletableFuture;
23 24
24 @Slf4j 25 @Slf4j
@@ -81,16 +82,11 @@ public class TTSStreamController { @@ -81,16 +82,11 @@ public class TTSStreamController {
81 return pythonTtsProxyService.synthesizeStreamAi(request); 82 return pythonTtsProxyService.synthesizeStreamAi(request);
82 } 83 }
83 84
84 - @GetMapping("/audio")  
85 - public ResponseEntity<TTSResponseDTO> getAudio(String cacheKey) {  
86 - if (ObjectUtil.isEmpty(cacheKey)) {  
87 - return ResponseEntity.ok(TTSResponseDTO.builder().code(204).build());  
88 - }  
89 - TTSResponseDTO dto = LocalAudioCache.get(cacheKey);  
90 - if (dto == null) {  
91 - return ResponseEntity.ok(TTSResponseDTO.builder().code(204).build());  
92 - }  
93 - return ResponseEntity.ok(dto); 85 + @GetMapping("/audio/piece")
  86 + public ResponseEntity<Map<String, String>> getPiece(
  87 + @RequestParam String cacheKey,
  88 + @RequestParam int index) {
  89 + return ResponseEntity.ok(LocalAudioCache.getPiece(cacheKey, index));
94 } 90 }
95 91
96 /** 92 /**
src/main/resources/templates/chat.html
@@ -479,8 +479,7 @@ @@ -479,8 +479,7 @@
479 }; 479 };
480 480
481 let chatHistory = []; 481 let chatHistory = [];
482 - let audioQueue = [];  
483 - let isPlaying = false; 482 +
484 let currentModel = 'general'; 483 let currentModel = 'general';
485 const md = window.markdownit({ 484 const md = window.markdownit({
486 html: true, 485 html: true,
@@ -556,9 +555,45 @@ @@ -556,9 +555,45 @@
556 doMessage(input, message, button); 555 doMessage(input, message, button);
557 } 556 }
558 557
559 - // ======================  
560 - // 🔥 已修复:完整 fetch 流式交互  
561 - // ====================== 558 + // ============================
  559 + // 核心:按序号 0,1,2... 顺序获取 + 播放
  560 + // ===========================
  561 + async function playByIndex(cacheKey, currentIndex, totalSize) {
  562 + if (currentIndex >= totalSize) return;
  563 +
  564 + async function checkPiece() {
  565 + try {
  566 + // 你原来的 fetch 写法 100% 保留
  567 + const res = await fetch(`${CONFIG.backendUrl}/api/tts/audio/piece?cacheKey=${cacheKey}&index=${currentIndex}`);
  568 + const piece = await res.json();
  569 +
  570 + if (piece && piece.audio) {
  571 + // 你原来的 base64 播放方式
  572 + const blob = base64ToBlob(piece.audio);
  573 + const audio = new Audio(URL.createObjectURL(blob));
  574 +
  575 + audio.onended = () => {
  576 + // 自动播放下一段
  577 + playByIndex(cacheKey, currentIndex + 1, totalSize);
  578 + };
  579 +
  580 + audio.play().catch(err => {
  581 + console.log('播放异常,自动下一段', err);
  582 + playByIndex(cacheKey, currentIndex + 1, totalSize);
  583 + });
  584 +
  585 + } else {
  586 + // 没获取到,等待再取(你原来的 800ms)
  587 + setTimeout(checkPiece, 800);
  588 + }
  589 + } catch (e) {
  590 + setTimeout(checkPiece, 800);
  591 + }
  592 + }
  593 +
  594 + checkPiece();
  595 + }
  596 +
562 async function doMessage(input, message, button) { 597 async function doMessage(input, message, button) {
563 addMessage(message, 'user'); 598 addMessage(message, 'user');
564 showTypingIndicator(); 599 showTypingIndicator();
@@ -586,36 +621,11 @@ @@ -586,36 +621,11 @@
586 const replyText = (data.processedText || "") + (data.systemText || ""); 621 const replyText = (data.processedText || "") + (data.systemText || "");
587 addMessage(replyText, 'ai'); 622 addMessage(replyText, 'ai');
588 623
589 - // ==============================================  
590 - // 👇 【关键】用 cacheKey 取音频(绝对不串音)  
591 - // ==============================================  
592 const cacheKey = data.cacheKey; 624 const cacheKey = data.cacheKey;
593 - if (!cacheKey) return;  
594 -  
595 - let retry = 0;  
596 - const checkAudio = async () => {  
597 - retry++;  
598 - if (retry > 20) return;  
599 -  
600 - try {  
601 - // ==============================================  
602 - // 👇 用 cacheKey 获取自己的音频(别人拿不到)  
603 - // ==============================================  
604 - const res = await fetch(`${CONFIG.backendUrl}/api/tts/audio?cacheKey=${encodeURIComponent(cacheKey)}`);  
605 - const audioData = await res.json();  
606 -  
607 - if (audioData.audioBase64) {  
608 - const blob = base64ToBlob(audioData.audioBase64);  
609 - const audio = new Audio(URL.createObjectURL(blob));  
610 - audio.play().catch(err => console.log('播放异常', err));  
611 - } else {  
612 - setTimeout(checkAudio, 800);  
613 - }  
614 - } catch (e) {  
615 - setTimeout(checkAudio, 800);  
616 - }  
617 - };  
618 - setTimeout(checkAudio, 1200); 625 + const audioSize = data.audioSize; // 总分几段
  626 +
  627 +
  628 + playByIndex(cacheKey, 0, audioSize);
619 629
620 } catch (error) { 630 } catch (error) {
621 console.error('错误:', error); 631 console.error('错误:', error);
@@ -629,28 +639,6 @@ @@ -629,28 +639,6 @@
629 } 639 }
630 } 640 }
631 641
632 - // ==============================  
633 - // 👇 语音排队播放函数(保证顺序)  
634 - // ==============================  
635 - function playNextAudio() {  
636 - if (isPlaying || audioQueue.length === 0) return;  
637 -  
638 - isPlaying = true;  
639 - const base64 = audioQueue.shift();  
640 - const blob = base64ToBlob(base64);  
641 - const audio = new Audio(URL.createObjectURL(blob));  
642 -  
643 - audio.onended = () => {  
644 - isPlaying = false;  
645 - playNextAudio(); // 播放下一条  
646 - };  
647 -  
648 - audio.play().catch(err => {  
649 - isPlaying = false;  
650 - playNextAudio();  
651 - });  
652 - }  
653 -  
654 function base64ToBlob(base64) { 642 function base64ToBlob(base64) {
655 const byteCharacters = atob(base64); 643 const byteCharacters = atob(base64);
656 const byteNumbers = new Array(byteCharacters.length); 644 const byteNumbers = new Array(byteCharacters.length);