Commit 7e0691c7d5940a1edb8f3fd291e17b6f5d3ca0e9

Authored by yanghl
1 parent 3ea4dac5

语音流式方式处理,按顺序播放。

src/main/java/com/xly/tts/bean/TTSResponseDTO.java
... ... @@ -54,6 +54,7 @@ public class TTSResponseDTO implements Serializable {
54 54 private String audioBase64;
55 55 private Integer audioSize;
56 56 private String audioFormat;
  57 + private String audioText;
57 58  
58 59 // 或者只返回音频URL
59 60 private String audioUrl;
... ...
src/main/java/com/xly/tts/service/LocalAudioCache.java
1 1 package com.xly.tts.service;
2 2  
3 3 import com.xly.tts.bean.TTSResponseDTO;
4   -import java.util.Map;
  4 +import java.util.*;
5 5 import java.util.concurrent.ConcurrentHashMap;
6 6  
7 7 public class LocalAudioCache {
8   - private static final Map<String, TTSResponseDTO> CACHE = new ConcurrentHashMap<>();
9 8  
10   - public static void put(String text, TTSResponseDTO dto) {
11   - CACHE.put(text, dto);
12   - // 5分钟后自动清理
13   - new Thread(() -> {
14   - try {
15   - Thread.sleep(5 * 60 * 1000);
16   - CACHE.remove(text);
17   - } catch (Exception ignored) {}
18   - }).start();
  9 + // 内部存储结构:cacheKey_index -> { "text":"...", "audio":"base64" }
  10 + private static final Map<String, Map<String, String>> CACHE = new ConcurrentHashMap<>();
  11 +
  12 + // 存储:一段文字 + 一段音频
  13 + public static void addPiece(String cacheKey, int index, String text, String audioBase64) {
  14 + String key = cacheKey + "_" + index;
  15 + CACHE.put(key, Map.of("text", text, "audio", audioBase64));
19 16 }
20 17  
21   - public static TTSResponseDTO get(String text) {
22   - return CACHE.get(text);
  18 + // 获取:一段文字 + 音频
  19 + public static Map<String, String> getPiece(String cacheKey, int index) {
  20 + return CACHE.get(cacheKey + "_" + index);
23 21 }
24 22 }
25 23 \ No newline at end of file
... ...
src/main/java/com/xly/tts/service/PythonTtsProxyService.java
... ... @@ -18,15 +18,13 @@ import org.springframework.stereotype.Service;
18 18 import org.springframework.web.client.RestTemplate;
19 19  
20 20 import javax.annotation.PostConstruct;
21   -import java.io.ByteArrayInputStream;
22   -import java.io.InputStream;
  21 +import java.io.*;
23 22 import java.util.*;
24 23 import java.util.concurrent.CompletableFuture;
25 24 import java.util.concurrent.ExecutorService;
26 25 import java.util.concurrent.Executors;
27 26 import java.net.URL;
28 27 import java.net.HttpURLConnection;
29   -import java.io.OutputStream;
30 28 import java.io.InputStream;
31 29  
32 30 @Slf4j
... ... @@ -135,11 +133,18 @@ public class PythonTtsProxyService {
135 133 }
136 134 String voiceTextNew = AdvancedSymbolRemover.removePunctuationHtml(aiText);
137 135  
  136 + // ============================
  137 + // 【绝对唯一】不会重复、不会覆盖
  138 + // ============================
  139 + String cacheKey = request.getUserid() + "_" + System.nanoTime();
  140 +
138 141 TTSResponseDTO dto = TTSResponseDTO.builder()
139 142 .code(200)
140 143 .message("success")
  144 + .cacheKey(cacheKey) // 前端靠这个取自己的分段
141 145 .originalText(request.getText())
142 146 .processedText(aiText)
  147 + .audioText(voiceTextNew)
143 148 .systemText(systemText)
144 149 .voice(request.getVoice())
145 150 .sSceneName(aiResponseDTO.getSSceneName())
... ... @@ -155,52 +160,101 @@ public class PythonTtsProxyService {
155 160 return ResponseEntity.ok(dto);
156 161 }
157 162  
158   - // ==============================================
159   - // 👇 【关键】生成 全局唯一的 key(多用户不冲突)
160   - // ==============================================
161   - String cacheKey = request.getUserid() + "_" + System.currentTimeMillis() + "_" + request.getText();
162   -
  163 + // 平均分割文字
  164 + List<String> textParts = splitTextSmart(voiceTextNew, 30);
  165 + dto.setAudioSize(textParts.size());
  166 + // 异步分段合成
163 167 CompletableFuture.runAsync(() -> {
164   - try {
165   - Map<String, Object> params = new HashMap<>();
166   - params.put("text", voiceTextNew);
167   - params.put("voice", request.getVoice());
168   - params.put("rate", request.getRate() != null ? request.getRate() : "+10%");
169   - params.put("volume", request.getVolume() != null ? request.getVolume() : "+0%");
170   -
171   - HttpHeaders headers = new HttpHeaders();
172   - headers.setContentType(MediaType.APPLICATION_JSON);
173   - headers.setAccept(Collections.singletonList(MediaType.APPLICATION_OCTET_STREAM));
174   - HttpEntity<Map<String, Object>> entity = new HttpEntity<>(params, headers);
175   -
176   - ResponseEntity<byte[]> response = restTemplate.exchange(
177   - pythonServiceUrl + "/stream-synthesize",
178   - HttpMethod.POST, entity, byte[].class
179   - );
180   -
181   - if (response.getStatusCode().is2xxSuccessful() && response.getBody() != null) {
182   - dto.setAudioBase64(Base64.getEncoder().encodeToString(response.getBody()));
183   - dto.setAudioSize(response.getBody().length);
184   - dto.setAudioFormat("audio/mpeg");
185   -
186   - // ==============================================
187   - // 👇 用唯一key存(不覆盖别人)
188   - // ==============================================
189   - LocalAudioCache.put(cacheKey, dto);
  168 + for (int i = 0; i < textParts.size(); i++) {
  169 + String part = textParts.get(i);
  170 + if (ObjectUtil.isEmpty(part)) continue;
  171 +
  172 + try {
  173 + Map<String, Object> params = new HashMap<>();
  174 + params.put("text", part);
  175 + params.put("voice", request.getVoice());
  176 + params.put("rate", request.getRate() != null ? request.getRate() : "+10%");
  177 + params.put("volume", request.getVolume() != null ? request.getVolume() : "+0%");
  178 +
  179 + HttpHeaders headers = new HttpHeaders();
  180 + headers.setContentType(MediaType.APPLICATION_JSON);
  181 + headers.setAccept(Collections.singletonList(MediaType.APPLICATION_OCTET_STREAM));
  182 + HttpEntity<Map<String, Object>> entity = new HttpEntity<>(params, headers);
  183 +
  184 + ResponseEntity<byte[]> response = restTemplate.exchange(
  185 + pythonServiceUrl + "/stream-synthesize",
  186 + HttpMethod.POST, entity, byte[].class
  187 + );
  188 +
  189 + if (response.getStatusCode().is2xxSuccessful() && response.getBody() != null) {
  190 + String base64 = Base64.getEncoder().encodeToString(response.getBody());
  191 +
  192 + // ============================
  193 + // 【关键】带序号存储!前端靠序号知道顺序!
  194 + // ============================
  195 + LocalAudioCache.addPiece(cacheKey, i, part, base64);
  196 + }
  197 + } catch (Exception e) {
  198 + log.warn("分段合成失败: {}", e.getMessage());
190 199 }
191   - } catch (Exception e) {
192   - log.warn("语音合成忽略:{}", e.getMessage());
193 200 }
194 201 }, executorService);
195 202  
196   - // ==============================================
197   - // 👇 把 cacheKey 返回给前端(前端靠它取音频)
198   - // ==============================================
199   - dto.setCacheKey(cacheKey);
200   -
201 203 return ResponseEntity.ok(dto);
202 204 }
203 205  
  206 + // ==============================================
  207 +// 智能分段:优先按 。!?; , 空格 断开
  208 +// 不会把一句话生硬切断,更自然
  209 +// ==============================================
  210 + private List<String> splitTextSmart(String text, int maxLength) {
  211 + List<String> parts = new ArrayList<>();
  212 + if (text == null || text.isEmpty()) return parts;
  213 +
  214 + int len = text.length();
  215 + int start = 0;
  216 +
  217 + while (start < len) {
  218 + int end = Math.min(start + maxLength, len);
  219 +
  220 + // 如果不是最后一段,寻找最近的断句点
  221 + if (end < len) {
  222 + // 优先按 。!?; 断句
  223 + int splitPos = lastIndexOfAny(text, start, end, '。', '!', '?', ';');
  224 + if (splitPos == -1) {
  225 + // 其次按 , 逗号
  226 + splitPos = lastIndexOfAny(text, start, end, ',');
  227 + }
  228 + if (splitPos == -1) {
  229 + // 最后按空格
  230 + splitPos = lastIndexOfAny(text, start, end, ' ');
  231 + }
  232 + if (splitPos != -1) {
  233 + end = splitPos + 1;
  234 + }
  235 + }
  236 +
  237 + String part = text.substring(start, end).trim();
  238 + if (!part.isEmpty()) {
  239 + parts.add(part);
  240 + }
  241 + start = end;
  242 + }
  243 + return parts;
  244 + }
  245 +
  246 + // 工具:查找最后出现的符号
  247 + private int lastIndexOfAny(String text, int start, int end, char... chars) {
  248 + for (int i = end - 1; i >= start; i--) {
  249 + for (char c : chars) {
  250 + if (text.charAt(i) == c) {
  251 + return i;
  252 + }
  253 + }
  254 + }
  255 + return -1;
  256 + }
  257 +
204 258 public ResponseEntity<InputStreamResource> getVoiceResult(TTSRequestDTO request) {
205 259 try {
206 260 String voiceText = AdvancedSymbolRemover.removePunctuationHtml(request.getText());
... ...
src/main/java/com/xly/util/AdvancedSymbolRemover.java
... ... @@ -15,26 +15,36 @@ public class AdvancedSymbolRemover {
15 15  
16 16  
17 17 /**
18   - * 移除所有标点符号(保留字母、数字、中文
  18 + * 移除所有符号(保留字母、数字、中文、标点
19 19 */
20 20 public static String removePunctuationHtml(String text) {
21 21 try{
22 22 if (text == null || text.isEmpty()) return "";
23 23 text = HtmlCleaner.cleanHtml(text);
24 24  
25   -
26 25 text = text.replaceAll("br", "");
27 26 text = text.replaceAll("<br/>", "");
28 27 text = text.replaceAll("</div>", "");
29 28 text = text.replaceAll("<div>", "");
30 29 text = text.replaceAll("&emsp;", "");
31   - // 👇 【安全正则】只删除 数字后面的 .0 或 .00
  30 +
  31 + // 去掉数字末尾无用的 .0 .00
32 32 text = text.replaceAll("(?<=\\d)\\.0+(?!\\d)", "");
33   - // 移除中文和英文标点
34   - text = text.replaceAll("[\\pP\\p{Punct}]", "");
35 33  
36   - // 可选:只保留字母、数字、汉字、空格
37   - text = text.replaceAll("[^\\p{L}\\p{N}\\p{Zs}]", "");
  34 + // 去掉无用文字
  35 + text = text.replaceAll("换一换", "");
  36 +
  37 + // 去掉 -,但保留负数
  38 + text = text.replaceAll("(?<!\\d)-(?![\\d.])|(?<=\\d)-", "");
  39 +
  40 + // ============================
  41 + // 🔥 核心:只保留 中文、英文、数字、小数点、负号、空格
  42 + // 其他所有符号全部清空!
  43 + // ============================
  44 + text = text.replaceAll("[^a-zA-Z0-9\\u4e00-\\u9fa5\\-. ]", "");
  45 +
  46 + // 多余空格变成单个空格(更干净)
  47 + text = text.replaceAll("\\s+", " ").trim();
38 48  
39 49 return text;
40 50 }catch (Exception e){
... ...
src/main/java/com/xly/web/TTSStreamController.java
... ... @@ -19,6 +19,7 @@ import javax.annotation.PostConstruct;
19 19 import javax.annotation.PreDestroy;
20 20 import java.io.InputStream;
21 21 import java.util.List;
  22 +import java.util.Map;
22 23 import java.util.concurrent.CompletableFuture;
23 24  
24 25 @Slf4j
... ... @@ -81,16 +82,11 @@ public class TTSStreamController {
81 82 return pythonTtsProxyService.synthesizeStreamAi(request);
82 83 }
83 84  
84   - @GetMapping("/audio")
85   - public ResponseEntity<TTSResponseDTO> getAudio(String cacheKey) {
86   - if (ObjectUtil.isEmpty(cacheKey)) {
87   - return ResponseEntity.ok(TTSResponseDTO.builder().code(204).build());
88   - }
89   - TTSResponseDTO dto = LocalAudioCache.get(cacheKey);
90   - if (dto == null) {
91   - return ResponseEntity.ok(TTSResponseDTO.builder().code(204).build());
92   - }
93   - return ResponseEntity.ok(dto);
  85 + @GetMapping("/audio/piece")
  86 + public ResponseEntity<Map<String, String>> getPiece(
  87 + @RequestParam String cacheKey,
  88 + @RequestParam int index) {
  89 + return ResponseEntity.ok(LocalAudioCache.getPiece(cacheKey, index));
94 90 }
95 91  
96 92 /**
... ...
src/main/resources/templates/chat.html
... ... @@ -479,8 +479,7 @@
479 479 };
480 480  
481 481 let chatHistory = [];
482   - let audioQueue = [];
483   - let isPlaying = false;
  482 +
484 483 let currentModel = 'general';
485 484 const md = window.markdownit({
486 485 html: true,
... ... @@ -556,9 +555,45 @@
556 555 doMessage(input, message, button);
557 556 }
558 557  
559   - // ======================
560   - // 🔥 已修复:完整 fetch 流式交互
561   - // ======================
  558 + // ============================
  559 + // 核心:按序号 0,1,2... 顺序获取 + 播放
  560 + // ===========================
  561 + async function playByIndex(cacheKey, currentIndex, totalSize) {
  562 + if (currentIndex >= totalSize) return;
  563 +
  564 + async function checkPiece() {
  565 + try {
  566 + // 你原来的 fetch 写法 100% 保留
  567 + const res = await fetch(`${CONFIG.backendUrl}/api/tts/audio/piece?cacheKey=${cacheKey}&index=${currentIndex}`);
  568 + const piece = await res.json();
  569 +
  570 + if (piece && piece.audio) {
  571 + // 你原来的 base64 播放方式
  572 + const blob = base64ToBlob(piece.audio);
  573 + const audio = new Audio(URL.createObjectURL(blob));
  574 +
  575 + audio.onended = () => {
  576 + // 自动播放下一段
  577 + playByIndex(cacheKey, currentIndex + 1, totalSize);
  578 + };
  579 +
  580 + audio.play().catch(err => {
  581 + console.log('播放异常,自动下一段', err);
  582 + playByIndex(cacheKey, currentIndex + 1, totalSize);
  583 + });
  584 +
  585 + } else {
  586 + // 没获取到,等待再取(你原来的 800ms)
  587 + setTimeout(checkPiece, 800);
  588 + }
  589 + } catch (e) {
  590 + setTimeout(checkPiece, 800);
  591 + }
  592 + }
  593 +
  594 + checkPiece();
  595 + }
  596 +
562 597 async function doMessage(input, message, button) {
563 598 addMessage(message, 'user');
564 599 showTypingIndicator();
... ... @@ -586,36 +621,11 @@
586 621 const replyText = (data.processedText || "") + (data.systemText || "");
587 622 addMessage(replyText, 'ai');
588 623  
589   - // ==============================================
590   - // 👇 【关键】用 cacheKey 取音频(绝对不串音)
591   - // ==============================================
592 624 const cacheKey = data.cacheKey;
593   - if (!cacheKey) return;
594   -
595   - let retry = 0;
596   - const checkAudio = async () => {
597   - retry++;
598   - if (retry > 20) return;
599   -
600   - try {
601   - // ==============================================
602   - // 👇 用 cacheKey 获取自己的音频(别人拿不到)
603   - // ==============================================
604   - const res = await fetch(`${CONFIG.backendUrl}/api/tts/audio?cacheKey=${encodeURIComponent(cacheKey)}`);
605   - const audioData = await res.json();
606   -
607   - if (audioData.audioBase64) {
608   - const blob = base64ToBlob(audioData.audioBase64);
609   - const audio = new Audio(URL.createObjectURL(blob));
610   - audio.play().catch(err => console.log('播放异常', err));
611   - } else {
612   - setTimeout(checkAudio, 800);
613   - }
614   - } catch (e) {
615   - setTimeout(checkAudio, 800);
616   - }
617   - };
618   - setTimeout(checkAudio, 1200);
  625 + const audioSize = data.audioSize; // 总分几段
  626 +
  627 +
  628 + playByIndex(cacheKey, 0, audioSize);
619 629  
620 630 } catch (error) {
621 631 console.error('错误:', error);
... ... @@ -629,28 +639,6 @@
629 639 }
630 640 }
631 641  
632   - // ==============================
633   - // 👇 语音排队播放函数(保证顺序)
634   - // ==============================
635   - function playNextAudio() {
636   - if (isPlaying || audioQueue.length === 0) return;
637   -
638   - isPlaying = true;
639   - const base64 = audioQueue.shift();
640   - const blob = base64ToBlob(base64);
641   - const audio = new Audio(URL.createObjectURL(blob));
642   -
643   - audio.onended = () => {
644   - isPlaying = false;
645   - playNextAudio(); // 播放下一条
646   - };
647   -
648   - audio.play().catch(err => {
649   - isPlaying = false;
650   - playNextAudio();
651   - });
652   - }
653   -
654 642 function base64ToBlob(base64) {
655 643 const byteCharacters = atob(base64);
656 644 const byteNumbers = new Array(byteCharacters.length);
... ...