PythonTtsProxyService.java 13.7 KB
package com.xly.tts.service;

import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import com.xly.constant.BusinessCode;
import com.xly.constant.ReturnTypeCode;
import com.xly.entity.AiResponseDTO;
import com.xly.service.UserSceneSessionService;
import com.xly.service.XlyErpService;
import com.xly.tts.bean.*;
import com.xly.util.AdvancedSymbolRemover;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.InputStreamResource;
import org.springframework.http.*;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;

import javax.annotation.PostConstruct;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.net.URL;
import java.net.HttpURLConnection;
import java.io.OutputStream;
import java.io.InputStream;

@Slf4j
@Service
@RequiredArgsConstructor
public class PythonTtsProxyService {

    private final RestTemplate restTemplate;

    @Value("${tts.python.url:http://localhost:8000}")
    private String pythonServiceUrl;

    @Value("${tts.python.timeout:30000}")
    private int timeout;

    private ExecutorService executorService;

    private final XlyErpService xlyErpService;
    private final UserSceneSessionService userSceneSessionService;

    @PostConstruct
    public void init() {
        executorService = Executors.newFixedThreadPool(5);
        log.info("Python TTS代理服务初始化完成,Python服务地址: {}", pythonServiceUrl);
    }

    public ResponseEntity<TTSResponseDTO> initTool(TTSRequestDTO request) {
        TTSResponseDTO ttsResponse = TTSResponseDTO.builder()
                .code(200)
                .message("success")
                .build();
        return ResponseEntity.ok(ttsResponse);
    }

    /**
     * 流式合成语音 - 代理到Python服务
     */
    public ResponseEntity<InputStreamResource> synthesizeStream(TTSRequestDTO request) {
        return getVoiceResult(request);
    }

    /**
     * 【保持原有返回类型】AI对话 + 流式TTS
     */
    public ResponseEntity<TTSResponseDTO> synthesizeStreamAi(TTSRequestDTO request) {
        String userInput = request.getText();
        String sUserId = request.getUserid();
        String sUserName = request.getUsername();
        String sBrandsId = request.getBrandsid();
        String sSubsidiaryId = request.getSubsidiaryid();
        String sUserType = request.getUsertype();
        String authorization = request.getAuthorization();
        AiResponseDTO voiceText = xlyErpService.erpUserInput(userInput, sUserId, sUserName, sBrandsId, sSubsidiaryId, sUserType, authorization);
        return synthesizeStreamAi(request, voiceText);
    }

    public ResponseEntity<TTSResponseDTO> cleanMemory(TTSRequestDTO request) {
        String sUserId = request.getUserid();
        String sUserName = request.getUsername();
        String sBrandsId = request.getBrandsid();
        String sSubsidiaryId = request.getSubsidiaryid();
        String sUserType = request.getUsertype();
        String authorization = request.getAuthorization();
        AiResponseDTO aiResponseDTO = xlyErpService.cleanMemory(sUserId, sUserName, sBrandsId, sSubsidiaryId, sUserType, authorization);
        return ResponseEntity.ok(TTSResponseDTO.builder()
                .code(200)
                .message("success")
                .originalText(request.getText())
                .processedText(aiResponseDTO.getAiText())
                .systemText(aiResponseDTO.getSystemText())
                .voice(request.getVoice())
                .sSceneName(aiResponseDTO.getSSceneName())
                .sMethodName(aiResponseDTO.getSMethodName())
                .sReturnType(aiResponseDTO.getSReturnType())
                .timestamp(System.currentTimeMillis())
                .textLength(request.getText().length())
                .build());
    }

    /**
     * 【保持原有返回类型】不动!!!
     */
    public ResponseEntity<TTSResponseDTO> init(TTSRequestDTO request) {
        String sUserId = request.getUserid();
        String sUserName = request.getUsername();
        String sBrandsId = request.getBrandsid();
        String sSubsidiaryId = request.getSubsidiaryid();
        String sUserType = request.getUsertype();
        String authorization = request.getAuthorization();

        // 清空记忆
        userSceneSessionService.cleanUserSession(sUserId);
        AiResponseDTO voiceText = xlyErpService.initSceneGuide(StrUtil.EMPTY, sUserId, sUserName, sBrandsId, sSubsidiaryId, sUserType, authorization);
        voiceText.setSReturnType(ReturnTypeCode.HTML.getCode());
        return synthesizeStreamAi(request, voiceText);
    }

    /**
     * 【保持原有返回类型】不动!内部流式请求Python
     */
    public ResponseEntity<TTSResponseDTO> synthesizeStreamAi(TTSRequestDTO request, AiResponseDTO aiResponseDTO) {
        String aiText = aiResponseDTO.getAiText();
        String systemText = aiResponseDTO.getSystemText();
        if (ObjectUtil.isEmpty(systemText)) {
            systemText = StrUtil.EMPTY;
        }
        String voiceTextNew = AdvancedSymbolRemover.removePunctuationHtml(aiText);

        TTSResponseDTO dto = TTSResponseDTO.builder()
                .code(200)
                .message("success")
                .originalText(request.getText())
                .processedText(aiText)
                .systemText(systemText)
                .voice(request.getVoice())
                .sSceneName(aiResponseDTO.getSSceneName())
                .sMethodName(aiResponseDTO.getSMethodName())
                .sReturnType(aiResponseDTO.getSReturnType())
                .sCommonts(BusinessCode.COMMONTS.getMessage())
                .timestamp(System.currentTimeMillis())
                .textLength((aiText + systemText).length())
                .build();

        boolean voiceless = Boolean.TRUE.equals(request.getVoiceless());
        if (!voiceless || ObjectUtil.isEmpty(voiceTextNew)) {
            return ResponseEntity.ok(dto);
        }

        // ==============================================
        // 👇 【关键】生成 全局唯一的 key(多用户不冲突)
        // ==============================================
        String cacheKey = request.getUserid() + "_" + System.currentTimeMillis() + "_" + request.getText();

        CompletableFuture.runAsync(() -> {
            try {
                Map<String, Object> params = new HashMap<>();
                params.put("text", voiceTextNew);
                params.put("voice", request.getVoice());
                params.put("rate", request.getRate() != null ? request.getRate() : "+10%");
                params.put("volume", request.getVolume() != null ? request.getVolume() : "+0%");

                HttpHeaders headers = new HttpHeaders();
                headers.setContentType(MediaType.APPLICATION_JSON);
                headers.setAccept(Collections.singletonList(MediaType.APPLICATION_OCTET_STREAM));
                HttpEntity<Map<String, Object>> entity = new HttpEntity<>(params, headers);

                ResponseEntity<byte[]> response = restTemplate.exchange(
                        pythonServiceUrl + "/stream-synthesize",
                        HttpMethod.POST, entity, byte[].class
                );

                if (response.getStatusCode().is2xxSuccessful() && response.getBody() != null) {
                    dto.setAudioBase64(Base64.getEncoder().encodeToString(response.getBody()));
                    dto.setAudioSize(response.getBody().length);
                    dto.setAudioFormat("audio/mpeg");

                    // ==============================================
                    // 👇 用唯一key存(不覆盖别人)
                    // ==============================================
                    LocalAudioCache.put(cacheKey, dto);
                }
            } catch (Exception e) {
                log.warn("语音合成忽略:{}", e.getMessage());
            }
        }, executorService);

        // ==============================================
        // 👇 把 cacheKey 返回给前端(前端靠它取音频)
        // ==============================================
        dto.setCacheKey(cacheKey);

        return ResponseEntity.ok(dto);
    }

    public ResponseEntity<InputStreamResource> getVoiceResult(TTSRequestDTO request) {
        try {
            String voiceText = AdvancedSymbolRemover.removePunctuationHtml(request.getText());
            Map<String, Object> pythonRequest = new HashMap<>();
            pythonRequest.put("text", voiceText);
            pythonRequest.put("voice", request.getVoice());
            pythonRequest.put("rate", request.getRate() != null ? request.getRate() : "+0%");
            pythonRequest.put("volume", request.getVolume() != null ? request.getVolume() : "+0%");

            HttpHeaders headers = new HttpHeaders();
            headers.setContentType(MediaType.APPLICATION_JSON);
            headers.setAccept(Arrays.asList(MediaType.APPLICATION_OCTET_STREAM, MediaType.ALL));

            HttpEntity<Map<String, Object>> entity = new HttpEntity<>(pythonRequest, headers);

            ResponseEntity<byte[]> response = restTemplate.exchange(
                    pythonServiceUrl + "/stream-synthesize",
                    HttpMethod.POST,
                    entity,
                    byte[].class
            );

            if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) {
                InputStream inputStream = new ByteArrayInputStream(response.getBody());
                InputStreamResource resource = new InputStreamResource(inputStream);
                HttpHeaders responseHeaders = new HttpHeaders();
                responseHeaders.setContentType(MediaType.parseMediaType("audio/mpeg"));
                responseHeaders.setContentLength(response.getBody().length);
                responseHeaders.set("Content-Disposition", "inline; filename=\"speech.mp3\"");
                return ResponseEntity.ok().headers(responseHeaders).body(resource);
            }
        } catch (Exception e) {
            return fallbackResponse(request);
        }
        return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build();
    }

    public ResponseEntity<InputStreamResource> quickSynthesize(String text, String voice) {
        TTSRequestDTO request = new TTSRequestDTO();
        request.setText(text);
        request.setVoice(voice);
        return synthesizeStream(request);
    }

    public CompletableFuture<ResponseEntity<InputStreamResource>> synthesizeStreamAsync(TTSRequestDTO request) {
        return CompletableFuture.supplyAsync(() -> synthesizeStream(request), executorService);
    }

    public List<VoiceInfoDTO> getAvailableVoices() {
        try {
            ResponseEntity<Map> response = restTemplate.getForEntity(pythonServiceUrl + "/voices", Map.class);
            if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) {
                List<Map<String, String>> voicesData = (List<Map<String, String>>) response.getBody().get("voices");
                List<VoiceInfoDTO> voices = new ArrayList<>();
                for (Map<String, String> vd : voicesData) {
                    VoiceInfoDTO vo = new VoiceInfoDTO();
                    vo.setName(vd.get("name"));
                    vo.setLocale(vd.get("locale"));
                    vo.setGender(vd.get("gender"));
                    vo.setDisplayName(vd.get("displayName"));
                    voices.add(vo);
                }
                return voices;
            }
        } catch (Exception e) {
            log.error("获取语音列表失败", e);
        }
        return getDefaultVoices();
    }

    public VoiceInfoDTO getVoiceDetail(String name) {
        return getAvailableVoices().stream().filter(v -> v.getName().equals(name)).findFirst().orElse(null);
    }

    public boolean healthCheck() {
        try {
            ResponseEntity<Map> res = restTemplate.getForEntity(pythonServiceUrl + "/health", Map.class);
            return res.getStatusCode() == HttpStatus.OK && "healthy".equals(res.getBody().get("status"));
        } catch (Exception e) {
            return false;
        }
    }

    public List<ResponseEntity<InputStreamResource>> batchSynthesize(List<TTSRequestDTO> requests) {
        List<ResponseEntity<InputStreamResource>> list = new ArrayList<>();
        for (TTSRequestDTO req : requests) list.add(synthesizeStream(req));
        return list;
    }

    public ResponseEntity<InputStreamResource> synthesizeDirect(TTSRequestDTO request) {
        return synthesizeStream(request);
    }

    public void shutdown() {
        if (executorService != null) executorService.shutdown();
        log.info("Python TTS服务已关闭");
    }

    private ResponseEntity<InputStreamResource> fallbackResponse(TTSRequestDTO request) {
        try {
            TTSRequestDTO req = new TTSRequestDTO();
            req.setText("服务暂时不可用");
            req.setVoice("zh-CN-XiaoxiaoNeural");
            return synthesizeStream(req);
        } catch (Exception e) {
            return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).body(null);
        }
    }

    private List<VoiceInfoDTO> getDefaultVoices() {
        return Arrays.asList(
                createVoice("zh-CN-XiaoxiaoNeural", "zh-CN", "Female", "晓晓"),
                createVoice("zh-CN-YunyangNeural", "zh-CN", "Male", "云扬")
        );
    }

    private VoiceInfoDTO createVoice(String name, String locale, String gender, String displayName) {
        VoiceInfoDTO v = new VoiceInfoDTO();
        v.setName(name); v.setLocale(locale); v.setGender(gender); v.setDisplayName(displayName);
        return v;
    }
}