package com.xly.tts.service; import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.StrUtil; import com.xly.constant.ReturnTypeCode; import com.xly.entity.AiResponseDTO; import com.xly.service.UserSceneSessionService; import com.xly.service.XlyErpService; import com.xly.tts.bean.*; import com.xly.util.AdvancedSymbolRemover; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.io.InputStreamResource; import org.springframework.http.*; import org.springframework.stereotype.Service; import org.springframework.web.client.RestTemplate; import javax.annotation.PostConstruct; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @Slf4j @Service @RequiredArgsConstructor public class PythonTtsProxyService { private final RestTemplate restTemplate; @Value("${tts.python.url:http://localhost:8000}") private String pythonServiceUrl; @Value("${tts.python.timeout:30000}") private int timeout; private ExecutorService executorService; private final XlyErpService xlyErpService; private final UserSceneSessionService userSceneSessionService; @PostConstruct public void init() { executorService = Executors.newFixedThreadPool(5); log.info("Python TTS代理服务初始化完成,Python服务地址: {}", pythonServiceUrl); } public ResponseEntity initTool(TTSRequestDTO request) { TTSResponseDTO ttsResponse = TTSResponseDTO.builder() .code(200) .message("success") .build(); return ResponseEntity.ok(ttsResponse); } /** * 流式合成语音 - 代理到Python服务 */ public ResponseEntity synthesizeStream(TTSRequestDTO request) { return getVoiceResult(request); } /** * 流式合成语音 - 代理到Python服务 */ public ResponseEntity synthesizeStreamAi(TTSRequestDTO request) { //调用AI返回请求内容 String userInput = request.getText(); String sUserId = request.getUserid(); String sUserType = request.getUsertype(); String authorization = request.getAuthorization(); AiResponseDTO voiceText = xlyErpService.erpUserInput(userInput,sUserId,sUserType, authorization); return synthesizeStreamAi(request,voiceText); } /*** * @Author 钱豹 * @Date 11:16 2026/2/8 * @Param [request] * @return org.springframework.http.ResponseEntity * @Description 初始化加载方法 **/ public ResponseEntity init(TTSRequestDTO request) { //调用AI返回请求内容 String sUserId = request.getUserid(); String sUserType = request.getUsertype(); String authorization = request.getAuthorization(); //清空记忆 userSceneSessionService.cleanUserSession(sUserId); // xlyErpService.initSceneGuide(sUserId,sUserType,StrUtil.EMPTY) AiResponseDTO voiceText = xlyErpService.initSceneGuide(StrUtil.EMPTY,sUserId,sUserType, authorization); voiceText.setSReturnType(ReturnTypeCode.HTML.getCode()); return synthesizeStreamAi(request,voiceText); } public ResponseEntity synthesizeStreamAi(TTSRequestDTO request,AiResponseDTO aiResponseDTO) { String aiText = aiResponseDTO.getAiText(); String systemText = aiResponseDTO.getSystemText(); if(ObjectUtil.isEmpty(systemText)){ systemText = StrUtil.EMPTY; } //移除html String voiceTextNew = AdvancedSymbolRemover.removePunctuationHtml(aiText); try { //如果没有语音直接返回 if(!request.getVoiceless() || ObjectUtil.isEmpty(voiceTextNew)){ return ResponseEntity.ok(TTSResponseDTO.builder() .code(200) .message("success") .originalText(request.getText()) // 原始文本 .processedText(aiText) // AI提示语 .systemText(systemText) // 系统提示语言 .voice(request.getVoice()) .sSceneName(aiResponseDTO.getSSceneName()) .sMethodName (aiResponseDTO.getSMethodName()) .sReturnType (aiResponseDTO.getSReturnType()) .timestamp(System.currentTimeMillis()) .textLength(request.getText().length()) .build()); } // 构建Python服务请求 Map pythonRequest = new HashMap<>(); pythonRequest.put("text", voiceTextNew); pythonRequest.put("voice", request.getVoice()); pythonRequest.put("rate", request.getRate() != null ? request.getRate() : "+10%"); pythonRequest.put("volume", request.getVolume() != null ? request.getVolume() : "+0%"); // 发送请求到Python服务 HttpHeaders headers = new HttpHeaders(); headers.setContentType(MediaType.APPLICATION_JSON); headers.setAccept(Arrays.asList(MediaType.APPLICATION_OCTET_STREAM, MediaType.ALL)); HttpEntity> entity = new HttpEntity<>(pythonRequest, headers); ResponseEntity response = restTemplate.exchange( pythonServiceUrl + "/stream-synthesize", HttpMethod.POST, entity, byte[].class ); if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) { // 将音频数据转为Base64 String audioBase64 = Base64.getEncoder().encodeToString(response.getBody()); // 构建完整的响应DTO TTSResponseDTO ttsResponse = TTSResponseDTO.builder() .code(200) .message("success") .originalText(request.getText()) // 原始文本 .processedText(aiText) // AI提示语 .systemText(systemText) // 系统提示语言 .voice(request.getVoice()) .timestamp(System.currentTimeMillis()) .textLength((aiText+systemText).length()) .audioBase64(audioBase64) // Base64编码的音频 .audioSize(response.getBody().length) .sSceneName(aiResponseDTO.getSSceneName()) .sMethodName (aiResponseDTO.getSMethodName()) .sReturnType (aiResponseDTO.getSReturnType()) .audioFormat("audio/mpeg") .build(); return ResponseEntity.ok(ttsResponse); } else { return ResponseEntity.status(response.getStatusCode()) .body(TTSResponseDTO.error("python_service_error", 500, "Python服务响应失败: " + response.getStatusCode())); } } catch (Exception e) { // e.printStackTrace(); TTSResponseDTO ttsResponse = TTSResponseDTO.builder() .code(200) .message("success") .originalText(request.getText()) // 原始文本 .voice(request.getVoice()) .timestamp(System.currentTimeMillis()) .processedText(aiText) // AI提示语 .systemText(systemText) // 系统提示语言 .textLength((aiText+systemText).length()) .sSceneName(aiResponseDTO.getSSceneName()) .sMethodName (aiResponseDTO.getSMethodName()) .sReturnType (aiResponseDTO.getSReturnType()) .build(); return ResponseEntity.ok(ttsResponse); } } public ResponseEntity getVoiceResult(TTSRequestDTO request) { try { String voiceText = request.getText(); //移除html voiceText = AdvancedSymbolRemover.removePunctuationHtml( voiceText); // 构建Python服务请求 Map pythonRequest = new HashMap<>(); pythonRequest.put("text", voiceText); pythonRequest.put("voice", request.getVoice()); pythonRequest.put("rate", request.getRate() != null ? request.getRate() : "+0%"); pythonRequest.put("volume", request.getVolume() != null ? request.getVolume() : "+0%"); // 发送请求到Python服务 HttpHeaders headers = new HttpHeaders(); headers.setContentType(MediaType.APPLICATION_JSON); headers.setAccept(Arrays.asList(MediaType.APPLICATION_OCTET_STREAM, MediaType.ALL)); HttpEntity> entity = new HttpEntity<>(pythonRequest, headers); ResponseEntity response = restTemplate.exchange( pythonServiceUrl + "/stream-synthesize", HttpMethod.POST, entity, byte[].class ); if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) { InputStream inputStream = new ByteArrayInputStream(response.getBody()); InputStreamResource resource = new InputStreamResource(inputStream); // 构建响应头 HttpHeaders responseHeaders = new HttpHeaders(); responseHeaders.setContentType(MediaType.parseMediaType("audio/mpeg")); responseHeaders.setContentLength(response.getBody().length); responseHeaders.set("Content-Disposition", "inline; filename=\"speech.mp3\""); responseHeaders.set("X-TTS-Source", "python-service"); responseHeaders.set("X-TTS-Voice", request.getVoice()); return ResponseEntity.ok() .headers(responseHeaders) .body(resource); } else { return ResponseEntity.status(response.getStatusCode()).build(); } } catch (Exception e) { return fallbackResponse(request); } } /** * 快速合成接口 */ public ResponseEntity quickSynthesize(String text, String voice) { TTSRequestDTO request = new TTSRequestDTO(); request.setText(text); request.setVoice(voice); return synthesizeStream(request); } /** * 异步流式合成 */ public CompletableFuture> synthesizeStreamAsync(TTSRequestDTO request) { return CompletableFuture.supplyAsync(() -> synthesizeStream(request), executorService); } /** * 获取可用语音列表 */ public List getAvailableVoices() { try { log.info("从Python服务获取语音列表"); ResponseEntity response = restTemplate.getForEntity( pythonServiceUrl + "/voices", Map.class ); if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) { Map responseBody = response.getBody(); List> voicesData = (List>) responseBody.get("voices"); List voices = new ArrayList<>(); for (Map voiceData : voicesData) { VoiceInfoDTO voice = new VoiceInfoDTO(); voice.setName(voiceData.get("name")); voice.setLocale(voiceData.get("locale")); voice.setGender(voiceData.get("gender")); voice.setDisplayName(voiceData.get("displayName")); voices.add(voice); } log.info("从Python服务获取到 {} 个语音", voices.size()); return voices; } } catch (Exception e) { log.error("获取Python服务语音列表失败: {}", e.getMessage()); } // 返回默认语音列表作为降级 return getDefaultVoices(); } /** * 获取语音详情 */ public VoiceInfoDTO getVoiceDetail(String name) { List voices = getAvailableVoices(); return voices.stream() .filter(v -> v.getName().equals(name)) .findFirst() .orElse(null); } /** * 健康检查 */ public boolean healthCheck() { try { ResponseEntity response = restTemplate.getForEntity( pythonServiceUrl + "/health", Map.class ); boolean healthy = response.getStatusCode() == HttpStatus.OK && "healthy".equals(response.getBody().get("status")); log.info("Python服务健康状态: {}", healthy ? "健康" : "异常"); return healthy; } catch (Exception e) { log.error("Python服务健康检查失败: {}", e.getMessage()); return false; } } /** * 批量合成 */ public List> batchSynthesize(List requests) { List> results = new ArrayList<>(); for (TTSRequestDTO request : requests) { results.add(synthesizeStream(request)); } return results; } /** * 直接合成(用于测试) */ public ResponseEntity synthesizeDirect(TTSRequestDTO request) { return synthesizeStream(request); } /** * 关闭服务 */ public void shutdown() { if (executorService != null) { executorService.shutdown(); } log.info("Python TTS代理服务已关闭"); } /** * 降级响应 */ private ResponseEntity fallbackResponse(TTSRequestDTO request) { try { // 可以返回一个默认的音频文件 String fallbackText = "对不起,语音合成服务暂时不可用,请稍后重试。"; TTSRequestDTO fallbackRequest = new TTSRequestDTO(); fallbackRequest.setText(fallbackText); fallbackRequest.setVoice("zh-CN-XiaoxiaoNeural"); // 这里可以调用本地备用的TTS服务 return synthesizeStream(fallbackRequest); } catch (Exception e) { log.error("降级响应也失败了: {}", e.getMessage()); return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE) .header("X-TTS-Error", "服务暂时不可用") .body(null); } } /** * 获取默认语音列表 */ private List getDefaultVoices() { List defaultVoices = Arrays.asList( createVoice("zh-CN-XiaoxiaoNeural", "zh-CN", "Female", "晓晓 - 中文女声"), createVoice("zh-CN-YunyangNeural", "zh-CN", "Male", "云扬 - 中文男声"), createVoice("en-US-JennyNeural", "en-US", "Female", "Jenny - 英文女声"), createVoice("en-US-GuyNeural", "en-US", "Male", "Guy - 英文男声"), createVoice("ja-JP-NanamiNeural", "ja-JP", "Female", "七海 - 日文女声"), createVoice("ko-KR-SunHiNeural", "ko-KR", "Female", "선히 - 韩文女声") ); log.warn("使用默认语音列表,共 {} 个语音", defaultVoices.size()); return defaultVoices; } private VoiceInfoDTO createVoice(String name, String locale, String gender, String displayName) { VoiceInfoDTO voice = new VoiceInfoDTO(); voice.setName(name); voice.setLocale(locale); voice.setGender(gender); voice.setDisplayName(displayName); return voice; } }