package com.xly.tts.service; import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.StrUtil; import com.xly.constant.BusinessCode; import com.xly.constant.ReturnTypeCode; import com.xly.entity.AiResponseDTO; import com.xly.service.UserSceneSessionService; import com.xly.service.XlyErpService; import com.xly.tts.bean.*; import com.xly.util.AdvancedSymbolRemover; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.io.InputStreamResource; import org.springframework.http.*; import org.springframework.stereotype.Service; import org.springframework.web.client.RestTemplate; import javax.annotation.PostConstruct; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.net.URL; import java.net.HttpURLConnection; import java.io.OutputStream; import java.io.InputStream; @Slf4j @Service @RequiredArgsConstructor public class PythonTtsProxyService { private final RestTemplate restTemplate; @Value("${tts.python.url:http://localhost:8000}") private String pythonServiceUrl; @Value("${tts.python.timeout:30000}") private int timeout; private ExecutorService executorService; private final XlyErpService xlyErpService; private final UserSceneSessionService userSceneSessionService; @PostConstruct public void init() { executorService = Executors.newFixedThreadPool(5); log.info("Python TTS代理服务初始化完成,Python服务地址: {}", pythonServiceUrl); } public ResponseEntity initTool(TTSRequestDTO request) { TTSResponseDTO ttsResponse = TTSResponseDTO.builder() .code(200) .message("success") .build(); return ResponseEntity.ok(ttsResponse); } /** * 流式合成语音 - 代理到Python服务 */ public ResponseEntity synthesizeStream(TTSRequestDTO request) { return getVoiceResult(request); } /** * 【保持原有返回类型】AI对话 + 流式TTS */ public ResponseEntity synthesizeStreamAi(TTSRequestDTO request) { String userInput = request.getText(); String sUserId = request.getUserid(); String sUserName = request.getUsername(); String sBrandsId = request.getBrandsid(); String sSubsidiaryId = request.getSubsidiaryid(); String sUserType = request.getUsertype(); String authorization = request.getAuthorization(); AiResponseDTO voiceText = xlyErpService.erpUserInput(userInput, sUserId, sUserName, sBrandsId, sSubsidiaryId, sUserType, authorization); return synthesizeStreamAi(request, voiceText); } public ResponseEntity cleanMemory(TTSRequestDTO request) { String sUserId = request.getUserid(); String sUserName = request.getUsername(); String sBrandsId = request.getBrandsid(); String sSubsidiaryId = request.getSubsidiaryid(); String sUserType = request.getUsertype(); String authorization = request.getAuthorization(); AiResponseDTO aiResponseDTO = xlyErpService.cleanMemory(sUserId, sUserName, sBrandsId, sSubsidiaryId, sUserType, authorization); return ResponseEntity.ok(TTSResponseDTO.builder() .code(200) .message("success") .originalText(request.getText()) .processedText(aiResponseDTO.getAiText()) .systemText(aiResponseDTO.getSystemText()) .voice(request.getVoice()) .sSceneName(aiResponseDTO.getSSceneName()) .sMethodName(aiResponseDTO.getSMethodName()) .sReturnType(aiResponseDTO.getSReturnType()) .timestamp(System.currentTimeMillis()) .textLength(request.getText().length()) .build()); } /** * 【保持原有返回类型】不动!!! */ public ResponseEntity init(TTSRequestDTO request) { String sUserId = request.getUserid(); String sUserName = request.getUsername(); String sBrandsId = request.getBrandsid(); String sSubsidiaryId = request.getSubsidiaryid(); String sUserType = request.getUsertype(); String authorization = request.getAuthorization(); // 清空记忆 userSceneSessionService.cleanUserSession(sUserId); AiResponseDTO voiceText = xlyErpService.initSceneGuide(StrUtil.EMPTY, sUserId, sUserName, sBrandsId, sSubsidiaryId, sUserType, authorization); voiceText.setSReturnType(ReturnTypeCode.HTML.getCode()); return synthesizeStreamAi(request, voiceText); } /** * 【保持原有返回类型】不动!内部流式请求Python */ public ResponseEntity synthesizeStreamAi(TTSRequestDTO request, AiResponseDTO aiResponseDTO) { String aiText = aiResponseDTO.getAiText(); String systemText = aiResponseDTO.getSystemText(); if (ObjectUtil.isEmpty(systemText)) { systemText = StrUtil.EMPTY; } String voiceTextNew = AdvancedSymbolRemover.removePunctuationHtml(aiText); TTSResponseDTO dto = TTSResponseDTO.builder() .code(200) .message("success") .originalText(request.getText()) .processedText(aiText) .systemText(systemText) .voice(request.getVoice()) .sSceneName(aiResponseDTO.getSSceneName()) .sMethodName(aiResponseDTO.getSMethodName()) .sReturnType(aiResponseDTO.getSReturnType()) .sCommonts(BusinessCode.COMMONTS.getMessage()) .timestamp(System.currentTimeMillis()) .textLength((aiText + systemText).length()) .build(); boolean voiceless = Boolean.TRUE.equals(request.getVoiceless()); if (!voiceless || ObjectUtil.isEmpty(voiceTextNew)) { return ResponseEntity.ok(dto); } // ============================================== // 👇 【关键】生成 全局唯一的 key(多用户不冲突) // ============================================== String cacheKey = request.getUserid() + "_" + System.currentTimeMillis() + "_" + request.getText(); CompletableFuture.runAsync(() -> { try { Map params = new HashMap<>(); params.put("text", voiceTextNew); params.put("voice", request.getVoice()); params.put("rate", request.getRate() != null ? request.getRate() : "+10%"); params.put("volume", request.getVolume() != null ? request.getVolume() : "+0%"); HttpHeaders headers = new HttpHeaders(); headers.setContentType(MediaType.APPLICATION_JSON); headers.setAccept(Collections.singletonList(MediaType.APPLICATION_OCTET_STREAM)); HttpEntity> entity = new HttpEntity<>(params, headers); ResponseEntity response = restTemplate.exchange( pythonServiceUrl + "/stream-synthesize", HttpMethod.POST, entity, byte[].class ); if (response.getStatusCode().is2xxSuccessful() && response.getBody() != null) { dto.setAudioBase64(Base64.getEncoder().encodeToString(response.getBody())); dto.setAudioSize(response.getBody().length); dto.setAudioFormat("audio/mpeg"); // ============================================== // 👇 用唯一key存(不覆盖别人) // ============================================== LocalAudioCache.put(cacheKey, dto); } } catch (Exception e) { log.warn("语音合成忽略:{}", e.getMessage()); } }, executorService); // ============================================== // 👇 把 cacheKey 返回给前端(前端靠它取音频) // ============================================== dto.setCacheKey(cacheKey); return ResponseEntity.ok(dto); } public ResponseEntity getVoiceResult(TTSRequestDTO request) { try { String voiceText = AdvancedSymbolRemover.removePunctuationHtml(request.getText()); Map pythonRequest = new HashMap<>(); pythonRequest.put("text", voiceText); pythonRequest.put("voice", request.getVoice()); pythonRequest.put("rate", request.getRate() != null ? request.getRate() : "+0%"); pythonRequest.put("volume", request.getVolume() != null ? request.getVolume() : "+0%"); HttpHeaders headers = new HttpHeaders(); headers.setContentType(MediaType.APPLICATION_JSON); headers.setAccept(Arrays.asList(MediaType.APPLICATION_OCTET_STREAM, MediaType.ALL)); HttpEntity> entity = new HttpEntity<>(pythonRequest, headers); ResponseEntity response = restTemplate.exchange( pythonServiceUrl + "/stream-synthesize", HttpMethod.POST, entity, byte[].class ); if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) { InputStream inputStream = new ByteArrayInputStream(response.getBody()); InputStreamResource resource = new InputStreamResource(inputStream); HttpHeaders responseHeaders = new HttpHeaders(); responseHeaders.setContentType(MediaType.parseMediaType("audio/mpeg")); responseHeaders.setContentLength(response.getBody().length); responseHeaders.set("Content-Disposition", "inline; filename=\"speech.mp3\""); return ResponseEntity.ok().headers(responseHeaders).body(resource); } } catch (Exception e) { return fallbackResponse(request); } return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build(); } public ResponseEntity quickSynthesize(String text, String voice) { TTSRequestDTO request = new TTSRequestDTO(); request.setText(text); request.setVoice(voice); return synthesizeStream(request); } public CompletableFuture> synthesizeStreamAsync(TTSRequestDTO request) { return CompletableFuture.supplyAsync(() -> synthesizeStream(request), executorService); } public List getAvailableVoices() { try { ResponseEntity response = restTemplate.getForEntity(pythonServiceUrl + "/voices", Map.class); if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) { List> voicesData = (List>) response.getBody().get("voices"); List voices = new ArrayList<>(); for (Map vd : voicesData) { VoiceInfoDTO vo = new VoiceInfoDTO(); vo.setName(vd.get("name")); vo.setLocale(vd.get("locale")); vo.setGender(vd.get("gender")); vo.setDisplayName(vd.get("displayName")); voices.add(vo); } return voices; } } catch (Exception e) { log.error("获取语音列表失败", e); } return getDefaultVoices(); } public VoiceInfoDTO getVoiceDetail(String name) { return getAvailableVoices().stream().filter(v -> v.getName().equals(name)).findFirst().orElse(null); } public boolean healthCheck() { try { ResponseEntity res = restTemplate.getForEntity(pythonServiceUrl + "/health", Map.class); return res.getStatusCode() == HttpStatus.OK && "healthy".equals(res.getBody().get("status")); } catch (Exception e) { return false; } } public List> batchSynthesize(List requests) { List> list = new ArrayList<>(); for (TTSRequestDTO req : requests) list.add(synthesizeStream(req)); return list; } public ResponseEntity synthesizeDirect(TTSRequestDTO request) { return synthesizeStream(request); } public void shutdown() { if (executorService != null) executorService.shutdown(); log.info("Python TTS服务已关闭"); } private ResponseEntity fallbackResponse(TTSRequestDTO request) { try { TTSRequestDTO req = new TTSRequestDTO(); req.setText("服务暂时不可用"); req.setVoice("zh-CN-XiaoxiaoNeural"); return synthesizeStream(req); } catch (Exception e) { return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).body(null); } } private List getDefaultVoices() { return Arrays.asList( createVoice("zh-CN-XiaoxiaoNeural", "zh-CN", "Female", "晓晓"), createVoice("zh-CN-YunyangNeural", "zh-CN", "Male", "云扬") ); } private VoiceInfoDTO createVoice(String name, String locale, String gender, String displayName) { VoiceInfoDTO v = new VoiceInfoDTO(); v.setName(name); v.setLocale(locale); v.setGender(gender); v.setDisplayName(displayName); return v; } }