PythonTtsProxyService.java 15.9 KB
package com.xly.tts.service;

import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import com.xly.constant.ReturnTypeCode;
import com.xly.entity.AiResponseDTO;
import com.xly.service.XlyErpService;
import com.xly.tts.bean.*;
import com.xly.util.AdvancedSymbolRemover;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.InputStreamResource;
import org.springframework.http.*;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;

import javax.annotation.PostConstruct;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

@Slf4j
@Service
@RequiredArgsConstructor
public class PythonTtsProxyService {

    private final RestTemplate restTemplate;

    @Value("${tts.python.url:http://localhost:8000}")
    private String pythonServiceUrl;

    @Value("${tts.python.timeout:30000}")
    private int timeout;

    private ExecutorService executorService;

    private final XlyErpService xlyErpService;

    @PostConstruct
    public void init() {
        executorService = Executors.newFixedThreadPool(5);
        log.info("Python TTS代理服务初始化完成,Python服务地址: {}", pythonServiceUrl);
    }

    public ResponseEntity<TTSResponseDTO> initTool(TTSRequestDTO request) {
        TTSResponseDTO ttsResponse = TTSResponseDTO.builder()
                .code(200)
                .message("success")
                .build();
        return ResponseEntity.ok(ttsResponse);
    }

    /**
     * 流式合成语音 - 代理到Python服务
     */
    public ResponseEntity<InputStreamResource> synthesizeStream(TTSRequestDTO request) {
       return getVoiceResult(request);
    }

    /**
     * 流式合成语音 - 代理到Python服务
     */
    public ResponseEntity<TTSResponseDTO> synthesizeStreamAi(TTSRequestDTO request) {
        //调用AI返回请求内容
        String userInput = request.getText();
        String sUserId = request.getUserid();
        String sUserType = request.getUsertype();
        String authorization = request.getAuthorization();
        AiResponseDTO voiceText = xlyErpService.erpUserInput(userInput,sUserId,sUserType, authorization);
        return synthesizeStreamAi(request,voiceText);
    }

    /***
     * @Author 钱豹
     * @Date 11:16 2026/2/8
     * @Param [request]
     * @return org.springframework.http.ResponseEntity<com.xly.tts.bean.TTSResponseDTO>
     * @Description 初始化加载方法
     **/
    public ResponseEntity<TTSResponseDTO> init(TTSRequestDTO request) {
        //调用AI返回请求内容
        String sUserId = request.getUserid();
        String sUserType = request.getUsertype();
        String authorization = request.getAuthorization();
//        xlyErpService.initSceneGuide(sUserId,sUserType,StrUtil.EMPTY)
        AiResponseDTO voiceText = xlyErpService.initSceneGuide(StrUtil.EMPTY,sUserId,sUserType, authorization);
        voiceText.setSReturnType(ReturnTypeCode.HTML.getCode());
        return synthesizeStreamAi(request,voiceText);
    }

    public ResponseEntity<TTSResponseDTO> synthesizeStreamAi(TTSRequestDTO request,AiResponseDTO aiResponseDTO) {
        String aiText = aiResponseDTO.getAiText();
        String systemText = aiResponseDTO.getSystemText();
        if(ObjectUtil.isEmpty(systemText)){
            systemText = StrUtil.EMPTY;
        }
        //移除html
        String voiceTextNew = AdvancedSymbolRemover.removePunctuationHtml(aiText);
        try {
            //如果没有语音直接返回
            if(!request.getVoiceless() || ObjectUtil.isEmpty(voiceTextNew)){
                return ResponseEntity.ok(TTSResponseDTO.builder()
                        .code(200)
                        .message("success")
                        .originalText(request.getText()) // 原始文本
                        .processedText(aiText) // AI提示语
                        .systemText(systemText) // 系统提示语言
                        .voice(request.getVoice())
                        .sSceneName(aiResponseDTO.getSSceneName())
                        .sMethodName (aiResponseDTO.getSMethodName())
                        .sReturnType (aiResponseDTO.getSReturnType())
                        .timestamp(System.currentTimeMillis())
                        .textLength(request.getText().length())
                        .build());
            }

            // 构建Python服务请求
            Map<String, Object> pythonRequest = new HashMap<>();
            pythonRequest.put("text", voiceTextNew);
            pythonRequest.put("voice", request.getVoice());
            pythonRequest.put("rate", request.getRate() != null ? request.getRate() : "+10%");
            pythonRequest.put("volume", request.getVolume() != null ? request.getVolume() : "+0%");
            // 发送请求到Python服务
            HttpHeaders headers = new HttpHeaders();
            headers.setContentType(MediaType.APPLICATION_JSON);
            headers.setAccept(Arrays.asList(MediaType.APPLICATION_OCTET_STREAM, MediaType.ALL));
            HttpEntity<Map<String, Object>> entity = new HttpEntity<>(pythonRequest, headers);
            ResponseEntity<byte[]> response = restTemplate.exchange(
                    pythonServiceUrl + "/stream-synthesize",
                    HttpMethod.POST,
                    entity,
                    byte[].class
            );

            if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) {
                // 将音频数据转为Base64
                String audioBase64 = Base64.getEncoder().encodeToString(response.getBody());
                // 构建完整的响应DTO
                TTSResponseDTO ttsResponse = TTSResponseDTO.builder()
                        .code(200)
                        .message("success")
                        .originalText(request.getText()) // 原始文本
                        .processedText(aiText) // AI提示语
                        .systemText(systemText) // 系统提示语言
                        .voice(request.getVoice())
                        .timestamp(System.currentTimeMillis())
                        .textLength((aiText+systemText).length())
                        .audioBase64(audioBase64) // Base64编码的音频
                        .audioSize(response.getBody().length)
                        .sSceneName(aiResponseDTO.getSSceneName())
                        .sMethodName (aiResponseDTO.getSMethodName())
                        .sReturnType (aiResponseDTO.getSReturnType())
                        .audioFormat("audio/mpeg")
                        .build();
                return ResponseEntity.ok(ttsResponse);
            } else {
                return ResponseEntity.status(response.getStatusCode())
                        .body(TTSResponseDTO.error("python_service_error", 500,
                                "Python服务响应失败: " + response.getStatusCode()));
            }

        } catch (Exception e) {
//            e.printStackTrace();
            TTSResponseDTO ttsResponse = TTSResponseDTO.builder()
                    .code(200)
                    .message("success")
                    .originalText(request.getText()) // 原始文本
                    .voice(request.getVoice())
                    .timestamp(System.currentTimeMillis())
                    .processedText(aiText) // AI提示语
                    .systemText(systemText) // 系统提示语言
                    .textLength((aiText+systemText).length())
                    .sSceneName(aiResponseDTO.getSSceneName())
                    .sMethodName (aiResponseDTO.getSMethodName())
                    .sReturnType (aiResponseDTO.getSReturnType())
                    .build();
            return ResponseEntity.ok(ttsResponse);
        }
    }

    public ResponseEntity<InputStreamResource> getVoiceResult(TTSRequestDTO request) {
        try {

            String  voiceText = request.getText();
            //移除html
            voiceText = AdvancedSymbolRemover.removePunctuationHtml( voiceText);
            // 构建Python服务请求
            Map<String, Object> pythonRequest = new HashMap<>();
            pythonRequest.put("text", voiceText);
            pythonRequest.put("voice", request.getVoice());
            pythonRequest.put("rate", request.getRate() != null ? request.getRate() : "+0%");
            pythonRequest.put("volume", request.getVolume() != null ? request.getVolume() : "+0%");
            // 发送请求到Python服务
            HttpHeaders headers = new HttpHeaders();
            headers.setContentType(MediaType.APPLICATION_JSON);
            headers.setAccept(Arrays.asList(MediaType.APPLICATION_OCTET_STREAM, MediaType.ALL));
            HttpEntity<Map<String, Object>> entity = new HttpEntity<>(pythonRequest, headers);
            ResponseEntity<byte[]> response = restTemplate.exchange(
                    pythonServiceUrl + "/stream-synthesize",
                    HttpMethod.POST,
                    entity,
                    byte[].class
            );
            if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) {
                InputStream inputStream = new ByteArrayInputStream(response.getBody());
                InputStreamResource resource = new InputStreamResource(inputStream);
                // 构建响应头
                HttpHeaders responseHeaders = new HttpHeaders();
                responseHeaders.setContentType(MediaType.parseMediaType("audio/mpeg"));
                responseHeaders.setContentLength(response.getBody().length);
                responseHeaders.set("Content-Disposition", "inline; filename=\"speech.mp3\"");
                responseHeaders.set("X-TTS-Source", "python-service");
                responseHeaders.set("X-TTS-Voice", request.getVoice());
                return ResponseEntity.ok()
                        .headers(responseHeaders)
                        .body(resource);
            } else {
                return ResponseEntity.status(response.getStatusCode()).build();
            }
        } catch (Exception e) {
            return fallbackResponse(request);
        }
    }

    /**
     * 快速合成接口
     */
    public ResponseEntity<InputStreamResource> quickSynthesize(String text, String voice) {
        TTSRequestDTO request = new TTSRequestDTO();
        request.setText(text);
        request.setVoice(voice);
        return synthesizeStream(request);
    }

    /**
     * 异步流式合成
     */
    public CompletableFuture<ResponseEntity<InputStreamResource>> synthesizeStreamAsync(TTSRequestDTO request) {
        return CompletableFuture.supplyAsync(() -> synthesizeStream(request), executorService);
    }

    /**
     * 获取可用语音列表
     */
    public List<VoiceInfoDTO> getAvailableVoices() {
        try {
            log.info("从Python服务获取语音列表");

            ResponseEntity<Map> response = restTemplate.getForEntity(
                    pythonServiceUrl + "/voices",
                    Map.class
            );

            if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) {
                Map<String, Object> responseBody = response.getBody();
                List<Map<String, String>> voicesData = (List<Map<String, String>>) responseBody.get("voices");

                List<VoiceInfoDTO> voices = new ArrayList<>();
                for (Map<String, String> voiceData : voicesData) {
                    VoiceInfoDTO voice = new VoiceInfoDTO();
                    voice.setName(voiceData.get("name"));
                    voice.setLocale(voiceData.get("locale"));
                    voice.setGender(voiceData.get("gender"));
                    voice.setDisplayName(voiceData.get("displayName"));
                    voices.add(voice);
                }

                log.info("从Python服务获取到 {} 个语音", voices.size());
                return voices;
            }
        } catch (Exception e) {
            log.error("获取Python服务语音列表失败: {}", e.getMessage());
        }

        // 返回默认语音列表作为降级
        return getDefaultVoices();
    }

    /**
     * 获取语音详情
     */
    public VoiceInfoDTO getVoiceDetail(String name) {
        List<VoiceInfoDTO> voices = getAvailableVoices();
        return voices.stream()
                .filter(v -> v.getName().equals(name))
                .findFirst()
                .orElse(null);
    }

    /**
     * 健康检查
     */
    public boolean healthCheck() {
        try {
            ResponseEntity<Map> response = restTemplate.getForEntity(
                    pythonServiceUrl + "/health",
                    Map.class
            );

            boolean healthy = response.getStatusCode() == HttpStatus.OK &&
                    "healthy".equals(response.getBody().get("status"));

            log.info("Python服务健康状态: {}", healthy ? "健康" : "异常");
            return healthy;

        } catch (Exception e) {
            log.error("Python服务健康检查失败: {}", e.getMessage());
            return false;
        }
    }

    /**
     * 批量合成
     */
    public List<ResponseEntity<InputStreamResource>> batchSynthesize(List<TTSRequestDTO> requests) {
        List<ResponseEntity<InputStreamResource>> results = new ArrayList<>();

        for (TTSRequestDTO request : requests) {
            results.add(synthesizeStream(request));
        }

        return results;
    }

    /**
     * 直接合成(用于测试)
     */
    public ResponseEntity<InputStreamResource> synthesizeDirect(TTSRequestDTO request) {
        return synthesizeStream(request);
    }

    /**
     * 关闭服务
     */
    public void shutdown() {
        if (executorService != null) {
            executorService.shutdown();
        }
        log.info("Python TTS代理服务已关闭");
    }

    /**
     * 降级响应
     */
    private ResponseEntity<InputStreamResource> fallbackResponse(TTSRequestDTO request) {
        try {
            // 可以返回一个默认的音频文件
            String fallbackText = "对不起,语音合成服务暂时不可用,请稍后重试。";
            TTSRequestDTO fallbackRequest = new TTSRequestDTO();
            fallbackRequest.setText(fallbackText);
            fallbackRequest.setVoice("zh-CN-XiaoxiaoNeural");
            // 这里可以调用本地备用的TTS服务
            return synthesizeStream(fallbackRequest);

        } catch (Exception e) {
            log.error("降级响应也失败了: {}", e.getMessage());
            return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE)
                    .header("X-TTS-Error", "服务暂时不可用")
                    .body(null);
        }
    }

    /**
     * 获取默认语音列表
     */
    private List<VoiceInfoDTO> getDefaultVoices() {
        List<VoiceInfoDTO> defaultVoices = Arrays.asList(
                createVoice("zh-CN-XiaoxiaoNeural", "zh-CN", "Female", "晓晓 - 中文女声"),
                createVoice("zh-CN-YunyangNeural", "zh-CN", "Male", "云扬 - 中文男声"),
                createVoice("en-US-JennyNeural", "en-US", "Female", "Jenny - 英文女声"),
                createVoice("en-US-GuyNeural", "en-US", "Male", "Guy - 英文男声"),
                createVoice("ja-JP-NanamiNeural", "ja-JP", "Female", "七海 - 日文女声"),
                createVoice("ko-KR-SunHiNeural", "ko-KR", "Female", "선히 - 韩文女声")
        );

        log.warn("使用默认语音列表,共 {} 个语音", defaultVoices.size());
        return defaultVoices;
    }

    private VoiceInfoDTO createVoice(String name, String locale, String gender, String displayName) {
        VoiceInfoDTO voice = new VoiceInfoDTO();
        voice.setName(name);
        voice.setLocale(locale);
        voice.setGender(gender);
        voice.setDisplayName(displayName);
        return voice;
    }
}