Commit 7e0691c7d5940a1edb8f3fd291e17b6f5d3ca0e9
1 parent
3ea4dac5
语音流式方式处理,按顺序播放。
Showing
6 changed files
with
174 additions
and
127 deletions
src/main/java/com/xly/tts/bean/TTSResponseDTO.java
src/main/java/com/xly/tts/service/LocalAudioCache.java
| 1 | 1 | package com.xly.tts.service; |
| 2 | 2 | |
| 3 | 3 | import com.xly.tts.bean.TTSResponseDTO; |
| 4 | -import java.util.Map; | |
| 4 | +import java.util.*; | |
| 5 | 5 | import java.util.concurrent.ConcurrentHashMap; |
| 6 | 6 | |
| 7 | 7 | public class LocalAudioCache { |
| 8 | - private static final Map<String, TTSResponseDTO> CACHE = new ConcurrentHashMap<>(); | |
| 9 | 8 | |
| 10 | - public static void put(String text, TTSResponseDTO dto) { | |
| 11 | - CACHE.put(text, dto); | |
| 12 | - // 5分钟后自动清理 | |
| 13 | - new Thread(() -> { | |
| 14 | - try { | |
| 15 | - Thread.sleep(5 * 60 * 1000); | |
| 16 | - CACHE.remove(text); | |
| 17 | - } catch (Exception ignored) {} | |
| 18 | - }).start(); | |
| 9 | + // 内部存储结构:cacheKey_index -> { "text":"...", "audio":"base64" } | |
| 10 | + private static final Map<String, Map<String, String>> CACHE = new ConcurrentHashMap<>(); | |
| 11 | + | |
| 12 | + // 存储:一段文字 + 一段音频 | |
| 13 | + public static void addPiece(String cacheKey, int index, String text, String audioBase64) { | |
| 14 | + String key = cacheKey + "_" + index; | |
| 15 | + CACHE.put(key, Map.of("text", text, "audio", audioBase64)); | |
| 19 | 16 | } |
| 20 | 17 | |
| 21 | - public static TTSResponseDTO get(String text) { | |
| 22 | - return CACHE.get(text); | |
| 18 | + // 获取:一段文字 + 音频 | |
| 19 | + public static Map<String, String> getPiece(String cacheKey, int index) { | |
| 20 | + return CACHE.get(cacheKey + "_" + index); | |
| 23 | 21 | } |
| 24 | 22 | } |
| 25 | 23 | \ No newline at end of file | ... | ... |
src/main/java/com/xly/tts/service/PythonTtsProxyService.java
| ... | ... | @@ -18,15 +18,13 @@ import org.springframework.stereotype.Service; |
| 18 | 18 | import org.springframework.web.client.RestTemplate; |
| 19 | 19 | |
| 20 | 20 | import javax.annotation.PostConstruct; |
| 21 | -import java.io.ByteArrayInputStream; | |
| 22 | -import java.io.InputStream; | |
| 21 | +import java.io.*; | |
| 23 | 22 | import java.util.*; |
| 24 | 23 | import java.util.concurrent.CompletableFuture; |
| 25 | 24 | import java.util.concurrent.ExecutorService; |
| 26 | 25 | import java.util.concurrent.Executors; |
| 27 | 26 | import java.net.URL; |
| 28 | 27 | import java.net.HttpURLConnection; |
| 29 | -import java.io.OutputStream; | |
| 30 | 28 | import java.io.InputStream; |
| 31 | 29 | |
| 32 | 30 | @Slf4j |
| ... | ... | @@ -135,11 +133,18 @@ public class PythonTtsProxyService { |
| 135 | 133 | } |
| 136 | 134 | String voiceTextNew = AdvancedSymbolRemover.removePunctuationHtml(aiText); |
| 137 | 135 | |
| 136 | + // ============================ | |
| 137 | + // 【绝对唯一】不会重复、不会覆盖 | |
| 138 | + // ============================ | |
| 139 | + String cacheKey = request.getUserid() + "_" + System.nanoTime(); | |
| 140 | + | |
| 138 | 141 | TTSResponseDTO dto = TTSResponseDTO.builder() |
| 139 | 142 | .code(200) |
| 140 | 143 | .message("success") |
| 144 | + .cacheKey(cacheKey) // 前端靠这个取自己的分段 | |
| 141 | 145 | .originalText(request.getText()) |
| 142 | 146 | .processedText(aiText) |
| 147 | + .audioText(voiceTextNew) | |
| 143 | 148 | .systemText(systemText) |
| 144 | 149 | .voice(request.getVoice()) |
| 145 | 150 | .sSceneName(aiResponseDTO.getSSceneName()) |
| ... | ... | @@ -155,52 +160,101 @@ public class PythonTtsProxyService { |
| 155 | 160 | return ResponseEntity.ok(dto); |
| 156 | 161 | } |
| 157 | 162 | |
| 158 | - // ============================================== | |
| 159 | - // 👇 【关键】生成 全局唯一的 key(多用户不冲突) | |
| 160 | - // ============================================== | |
| 161 | - String cacheKey = request.getUserid() + "_" + System.currentTimeMillis() + "_" + request.getText(); | |
| 162 | - | |
| 163 | + // 平均分割文字 | |
| 164 | + List<String> textParts = splitTextSmart(voiceTextNew, 30); | |
| 165 | + dto.setAudioSize(textParts.size()); | |
| 166 | + // 异步分段合成 | |
| 163 | 167 | CompletableFuture.runAsync(() -> { |
| 164 | - try { | |
| 165 | - Map<String, Object> params = new HashMap<>(); | |
| 166 | - params.put("text", voiceTextNew); | |
| 167 | - params.put("voice", request.getVoice()); | |
| 168 | - params.put("rate", request.getRate() != null ? request.getRate() : "+10%"); | |
| 169 | - params.put("volume", request.getVolume() != null ? request.getVolume() : "+0%"); | |
| 170 | - | |
| 171 | - HttpHeaders headers = new HttpHeaders(); | |
| 172 | - headers.setContentType(MediaType.APPLICATION_JSON); | |
| 173 | - headers.setAccept(Collections.singletonList(MediaType.APPLICATION_OCTET_STREAM)); | |
| 174 | - HttpEntity<Map<String, Object>> entity = new HttpEntity<>(params, headers); | |
| 175 | - | |
| 176 | - ResponseEntity<byte[]> response = restTemplate.exchange( | |
| 177 | - pythonServiceUrl + "/stream-synthesize", | |
| 178 | - HttpMethod.POST, entity, byte[].class | |
| 179 | - ); | |
| 180 | - | |
| 181 | - if (response.getStatusCode().is2xxSuccessful() && response.getBody() != null) { | |
| 182 | - dto.setAudioBase64(Base64.getEncoder().encodeToString(response.getBody())); | |
| 183 | - dto.setAudioSize(response.getBody().length); | |
| 184 | - dto.setAudioFormat("audio/mpeg"); | |
| 185 | - | |
| 186 | - // ============================================== | |
| 187 | - // 👇 用唯一key存(不覆盖别人) | |
| 188 | - // ============================================== | |
| 189 | - LocalAudioCache.put(cacheKey, dto); | |
| 168 | + for (int i = 0; i < textParts.size(); i++) { | |
| 169 | + String part = textParts.get(i); | |
| 170 | + if (ObjectUtil.isEmpty(part)) continue; | |
| 171 | + | |
| 172 | + try { | |
| 173 | + Map<String, Object> params = new HashMap<>(); | |
| 174 | + params.put("text", part); | |
| 175 | + params.put("voice", request.getVoice()); | |
| 176 | + params.put("rate", request.getRate() != null ? request.getRate() : "+10%"); | |
| 177 | + params.put("volume", request.getVolume() != null ? request.getVolume() : "+0%"); | |
| 178 | + | |
| 179 | + HttpHeaders headers = new HttpHeaders(); | |
| 180 | + headers.setContentType(MediaType.APPLICATION_JSON); | |
| 181 | + headers.setAccept(Collections.singletonList(MediaType.APPLICATION_OCTET_STREAM)); | |
| 182 | + HttpEntity<Map<String, Object>> entity = new HttpEntity<>(params, headers); | |
| 183 | + | |
| 184 | + ResponseEntity<byte[]> response = restTemplate.exchange( | |
| 185 | + pythonServiceUrl + "/stream-synthesize", | |
| 186 | + HttpMethod.POST, entity, byte[].class | |
| 187 | + ); | |
| 188 | + | |
| 189 | + if (response.getStatusCode().is2xxSuccessful() && response.getBody() != null) { | |
| 190 | + String base64 = Base64.getEncoder().encodeToString(response.getBody()); | |
| 191 | + | |
| 192 | + // ============================ | |
| 193 | + // 【关键】带序号存储!前端靠序号知道顺序! | |
| 194 | + // ============================ | |
| 195 | + LocalAudioCache.addPiece(cacheKey, i, part, base64); | |
| 196 | + } | |
| 197 | + } catch (Exception e) { | |
| 198 | + log.warn("分段合成失败: {}", e.getMessage()); | |
| 190 | 199 | } |
| 191 | - } catch (Exception e) { | |
| 192 | - log.warn("语音合成忽略:{}", e.getMessage()); | |
| 193 | 200 | } |
| 194 | 201 | }, executorService); |
| 195 | 202 | |
| 196 | - // ============================================== | |
| 197 | - // 👇 把 cacheKey 返回给前端(前端靠它取音频) | |
| 198 | - // ============================================== | |
| 199 | - dto.setCacheKey(cacheKey); | |
| 200 | - | |
| 201 | 203 | return ResponseEntity.ok(dto); |
| 202 | 204 | } |
| 203 | 205 | |
| 206 | + // ============================================== | |
| 207 | +// 智能分段:优先按 。!?; , 空格 断开 | |
| 208 | +// 不会把一句话生硬切断,更自然 | |
| 209 | +// ============================================== | |
| 210 | + private List<String> splitTextSmart(String text, int maxLength) { | |
| 211 | + List<String> parts = new ArrayList<>(); | |
| 212 | + if (text == null || text.isEmpty()) return parts; | |
| 213 | + | |
| 214 | + int len = text.length(); | |
| 215 | + int start = 0; | |
| 216 | + | |
| 217 | + while (start < len) { | |
| 218 | + int end = Math.min(start + maxLength, len); | |
| 219 | + | |
| 220 | + // 如果不是最后一段,寻找最近的断句点 | |
| 221 | + if (end < len) { | |
| 222 | + // 优先按 。!?; 断句 | |
| 223 | + int splitPos = lastIndexOfAny(text, start, end, '。', '!', '?', ';'); | |
| 224 | + if (splitPos == -1) { | |
| 225 | + // 其次按 , 逗号 | |
| 226 | + splitPos = lastIndexOfAny(text, start, end, ','); | |
| 227 | + } | |
| 228 | + if (splitPos == -1) { | |
| 229 | + // 最后按空格 | |
| 230 | + splitPos = lastIndexOfAny(text, start, end, ' '); | |
| 231 | + } | |
| 232 | + if (splitPos != -1) { | |
| 233 | + end = splitPos + 1; | |
| 234 | + } | |
| 235 | + } | |
| 236 | + | |
| 237 | + String part = text.substring(start, end).trim(); | |
| 238 | + if (!part.isEmpty()) { | |
| 239 | + parts.add(part); | |
| 240 | + } | |
| 241 | + start = end; | |
| 242 | + } | |
| 243 | + return parts; | |
| 244 | + } | |
| 245 | + | |
| 246 | + // 工具:查找最后出现的符号 | |
| 247 | + private int lastIndexOfAny(String text, int start, int end, char... chars) { | |
| 248 | + for (int i = end - 1; i >= start; i--) { | |
| 249 | + for (char c : chars) { | |
| 250 | + if (text.charAt(i) == c) { | |
| 251 | + return i; | |
| 252 | + } | |
| 253 | + } | |
| 254 | + } | |
| 255 | + return -1; | |
| 256 | + } | |
| 257 | + | |
| 204 | 258 | public ResponseEntity<InputStreamResource> getVoiceResult(TTSRequestDTO request) { |
| 205 | 259 | try { |
| 206 | 260 | String voiceText = AdvancedSymbolRemover.removePunctuationHtml(request.getText()); | ... | ... |
src/main/java/com/xly/util/AdvancedSymbolRemover.java
| ... | ... | @@ -15,26 +15,36 @@ public class AdvancedSymbolRemover { |
| 15 | 15 | |
| 16 | 16 | |
| 17 | 17 | /** |
| 18 | - * 移除所有标点符号(保留字母、数字、中文) | |
| 18 | + * 移除所有符号(保留字母、数字、中文、标点) | |
| 19 | 19 | */ |
| 20 | 20 | public static String removePunctuationHtml(String text) { |
| 21 | 21 | try{ |
| 22 | 22 | if (text == null || text.isEmpty()) return ""; |
| 23 | 23 | text = HtmlCleaner.cleanHtml(text); |
| 24 | 24 | |
| 25 | - | |
| 26 | 25 | text = text.replaceAll("br", ""); |
| 27 | 26 | text = text.replaceAll("<br/>", ""); |
| 28 | 27 | text = text.replaceAll("</div>", ""); |
| 29 | 28 | text = text.replaceAll("<div>", ""); |
| 30 | 29 | text = text.replaceAll(" ", ""); |
| 31 | - // 👇 【安全正则】只删除 数字后面的 .0 或 .00 | |
| 30 | + | |
| 31 | + // 去掉数字末尾无用的 .0 .00 | |
| 32 | 32 | text = text.replaceAll("(?<=\\d)\\.0+(?!\\d)", ""); |
| 33 | - // 移除中文和英文标点 | |
| 34 | - text = text.replaceAll("[\\pP\\p{Punct}]", ""); | |
| 35 | 33 | |
| 36 | - // 可选:只保留字母、数字、汉字、空格 | |
| 37 | - text = text.replaceAll("[^\\p{L}\\p{N}\\p{Zs}]", ""); | |
| 34 | + // 去掉无用文字 | |
| 35 | + text = text.replaceAll("换一换", ""); | |
| 36 | + | |
| 37 | + // 去掉 -,但保留负数 | |
| 38 | + text = text.replaceAll("(?<!\\d)-(?![\\d.])|(?<=\\d)-", ""); | |
| 39 | + | |
| 40 | + // ============================ | |
| 41 | + // 🔥 核心:只保留 中文、英文、数字、小数点、负号、空格 | |
| 42 | + // 其他所有符号全部清空! | |
| 43 | + // ============================ | |
| 44 | + text = text.replaceAll("[^a-zA-Z0-9\\u4e00-\\u9fa5\\-. ]", ""); | |
| 45 | + | |
| 46 | + // 多余空格变成单个空格(更干净) | |
| 47 | + text = text.replaceAll("\\s+", " ").trim(); | |
| 38 | 48 | |
| 39 | 49 | return text; |
| 40 | 50 | }catch (Exception e){ | ... | ... |
src/main/java/com/xly/web/TTSStreamController.java
| ... | ... | @@ -19,6 +19,7 @@ import javax.annotation.PostConstruct; |
| 19 | 19 | import javax.annotation.PreDestroy; |
| 20 | 20 | import java.io.InputStream; |
| 21 | 21 | import java.util.List; |
| 22 | +import java.util.Map; | |
| 22 | 23 | import java.util.concurrent.CompletableFuture; |
| 23 | 24 | |
| 24 | 25 | @Slf4j |
| ... | ... | @@ -81,16 +82,11 @@ public class TTSStreamController { |
| 81 | 82 | return pythonTtsProxyService.synthesizeStreamAi(request); |
| 82 | 83 | } |
| 83 | 84 | |
| 84 | - @GetMapping("/audio") | |
| 85 | - public ResponseEntity<TTSResponseDTO> getAudio(String cacheKey) { | |
| 86 | - if (ObjectUtil.isEmpty(cacheKey)) { | |
| 87 | - return ResponseEntity.ok(TTSResponseDTO.builder().code(204).build()); | |
| 88 | - } | |
| 89 | - TTSResponseDTO dto = LocalAudioCache.get(cacheKey); | |
| 90 | - if (dto == null) { | |
| 91 | - return ResponseEntity.ok(TTSResponseDTO.builder().code(204).build()); | |
| 92 | - } | |
| 93 | - return ResponseEntity.ok(dto); | |
| 85 | + @GetMapping("/audio/piece") | |
| 86 | + public ResponseEntity<Map<String, String>> getPiece( | |
| 87 | + @RequestParam String cacheKey, | |
| 88 | + @RequestParam int index) { | |
| 89 | + return ResponseEntity.ok(LocalAudioCache.getPiece(cacheKey, index)); | |
| 94 | 90 | } |
| 95 | 91 | |
| 96 | 92 | /** | ... | ... |
src/main/resources/templates/chat.html
| ... | ... | @@ -479,8 +479,7 @@ |
| 479 | 479 | }; |
| 480 | 480 | |
| 481 | 481 | let chatHistory = []; |
| 482 | - let audioQueue = []; | |
| 483 | - let isPlaying = false; | |
| 482 | + | |
| 484 | 483 | let currentModel = 'general'; |
| 485 | 484 | const md = window.markdownit({ |
| 486 | 485 | html: true, |
| ... | ... | @@ -556,9 +555,45 @@ |
| 556 | 555 | doMessage(input, message, button); |
| 557 | 556 | } |
| 558 | 557 | |
| 559 | - // ====================== | |
| 560 | - // 🔥 已修复:完整 fetch 流式交互 | |
| 561 | - // ====================== | |
| 558 | + // ============================ | |
| 559 | + // 核心:按序号 0,1,2... 顺序获取 + 播放 | |
| 560 | + // =========================== | |
| 561 | + async function playByIndex(cacheKey, currentIndex, totalSize) { | |
| 562 | + if (currentIndex >= totalSize) return; | |
| 563 | + | |
| 564 | + async function checkPiece() { | |
| 565 | + try { | |
| 566 | + // 你原来的 fetch 写法 100% 保留 | |
| 567 | + const res = await fetch(`${CONFIG.backendUrl}/api/tts/audio/piece?cacheKey=${cacheKey}&index=${currentIndex}`); | |
| 568 | + const piece = await res.json(); | |
| 569 | + | |
| 570 | + if (piece && piece.audio) { | |
| 571 | + // 你原来的 base64 播放方式 | |
| 572 | + const blob = base64ToBlob(piece.audio); | |
| 573 | + const audio = new Audio(URL.createObjectURL(blob)); | |
| 574 | + | |
| 575 | + audio.onended = () => { | |
| 576 | + // 自动播放下一段 | |
| 577 | + playByIndex(cacheKey, currentIndex + 1, totalSize); | |
| 578 | + }; | |
| 579 | + | |
| 580 | + audio.play().catch(err => { | |
| 581 | + console.log('播放异常,自动下一段', err); | |
| 582 | + playByIndex(cacheKey, currentIndex + 1, totalSize); | |
| 583 | + }); | |
| 584 | + | |
| 585 | + } else { | |
| 586 | + // 没获取到,等待再取(你原来的 800ms) | |
| 587 | + setTimeout(checkPiece, 800); | |
| 588 | + } | |
| 589 | + } catch (e) { | |
| 590 | + setTimeout(checkPiece, 800); | |
| 591 | + } | |
| 592 | + } | |
| 593 | + | |
| 594 | + checkPiece(); | |
| 595 | + } | |
| 596 | + | |
| 562 | 597 | async function doMessage(input, message, button) { |
| 563 | 598 | addMessage(message, 'user'); |
| 564 | 599 | showTypingIndicator(); |
| ... | ... | @@ -586,36 +621,11 @@ |
| 586 | 621 | const replyText = (data.processedText || "") + (data.systemText || ""); |
| 587 | 622 | addMessage(replyText, 'ai'); |
| 588 | 623 | |
| 589 | - // ============================================== | |
| 590 | - // 👇 【关键】用 cacheKey 取音频(绝对不串音) | |
| 591 | - // ============================================== | |
| 592 | 624 | const cacheKey = data.cacheKey; |
| 593 | - if (!cacheKey) return; | |
| 594 | - | |
| 595 | - let retry = 0; | |
| 596 | - const checkAudio = async () => { | |
| 597 | - retry++; | |
| 598 | - if (retry > 20) return; | |
| 599 | - | |
| 600 | - try { | |
| 601 | - // ============================================== | |
| 602 | - // 👇 用 cacheKey 获取自己的音频(别人拿不到) | |
| 603 | - // ============================================== | |
| 604 | - const res = await fetch(`${CONFIG.backendUrl}/api/tts/audio?cacheKey=${encodeURIComponent(cacheKey)}`); | |
| 605 | - const audioData = await res.json(); | |
| 606 | - | |
| 607 | - if (audioData.audioBase64) { | |
| 608 | - const blob = base64ToBlob(audioData.audioBase64); | |
| 609 | - const audio = new Audio(URL.createObjectURL(blob)); | |
| 610 | - audio.play().catch(err => console.log('播放异常', err)); | |
| 611 | - } else { | |
| 612 | - setTimeout(checkAudio, 800); | |
| 613 | - } | |
| 614 | - } catch (e) { | |
| 615 | - setTimeout(checkAudio, 800); | |
| 616 | - } | |
| 617 | - }; | |
| 618 | - setTimeout(checkAudio, 1200); | |
| 625 | + const audioSize = data.audioSize; // 总分几段 | |
| 626 | + | |
| 627 | + | |
| 628 | + playByIndex(cacheKey, 0, audioSize); | |
| 619 | 629 | |
| 620 | 630 | } catch (error) { |
| 621 | 631 | console.error('错误:', error); |
| ... | ... | @@ -629,28 +639,6 @@ |
| 629 | 639 | } |
| 630 | 640 | } |
| 631 | 641 | |
| 632 | - // ============================== | |
| 633 | - // 👇 语音排队播放函数(保证顺序) | |
| 634 | - // ============================== | |
| 635 | - function playNextAudio() { | |
| 636 | - if (isPlaying || audioQueue.length === 0) return; | |
| 637 | - | |
| 638 | - isPlaying = true; | |
| 639 | - const base64 = audioQueue.shift(); | |
| 640 | - const blob = base64ToBlob(base64); | |
| 641 | - const audio = new Audio(URL.createObjectURL(blob)); | |
| 642 | - | |
| 643 | - audio.onended = () => { | |
| 644 | - isPlaying = false; | |
| 645 | - playNextAudio(); // 播放下一条 | |
| 646 | - }; | |
| 647 | - | |
| 648 | - audio.play().catch(err => { | |
| 649 | - isPlaying = false; | |
| 650 | - playNextAudio(); | |
| 651 | - }); | |
| 652 | - } | |
| 653 | - | |
| 654 | 642 | function base64ToBlob(base64) { |
| 655 | 643 | const byteCharacters = atob(base64); |
| 656 | 644 | const byteNumbers = new Array(byteCharacters.length); | ... | ... |