From 1534a0228ed75c30278b5415cf37c9fd9ec25265 Mon Sep 17 00:00:00 2001 From: Song367 <601337784@qq.com> Date: Wed, 18 Jun 2025 17:22:26 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=9F=B3=E9=A2=91=E8=A3=81?= =?UTF-8?q?=E5=89=AA=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- service/llm_service.go | 97 +++++++++++------------------------------- 1 file changed, 24 insertions(+), 73 deletions(-) diff --git a/service/llm_service.go b/service/llm_service.go index 4449008..43e787b 100644 --- a/service/llm_service.go +++ b/service/llm_service.go @@ -616,93 +616,44 @@ func (s *LLMService) TrimAudioSilence(audioData string) (string, error) { channels := int(fmtChunk.NumChannels) bytesPerSample := int(fmtChunk.BitsPerSample) / 8 - // 优化后的静音检测参数 - silenceThreshold := 0.01 // 降低静音阈值,更敏感地检测声音 - windowSize := int(float64(fmtChunk.SampleRate) * 0.1) // 50ms滑动窗口 - minSilenceDuration := int(float64(fmtChunk.SampleRate) * 0.1) // 100ms最小静音持续时间 - bufferSamples := int(float64(fmtChunk.SampleRate) * 0.1) // 减少缓冲区到50ms - - // 计算RMS能量的辅助函数 - calculateRMS := func(startSample, endSample int) float64 { - if startSample >= endSample || startSample >= samplesPerChannel { - return 0.0 - } - if endSample > samplesPerChannel { - endSample = samplesPerChannel - } - - var sum float64 - sampleCount := 0 - - for i := startSample; i < endSample; i++ { - for ch := 0; ch < channels; ch++ { - offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample - if offset+bytesPerSample > len(audioBytes) { - continue - } - - // Convert bytes to sample value - var sample int16 - if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil { - continue - } - - // Normalize sample to [-1, 1] range and square it - normalizedSample := float64(sample) / 32768.0 - sum += normalizedSample * normalizedSample - sampleCount++ - } - } - - if sampleCount == 0 { - return 0.0 - } - return math.Sqrt(sum / float64(sampleCount)) - } - - // 使用滑动窗口检测静音 + // Find the last non-silent sample lastNonSilent := 0 - consecutiveSilentWindows := 0 - requiredSilentWindows := minSilenceDuration / windowSize + silenceThreshold := 0.01 // Adjust this threshold as needed - for i := 0; i < samplesPerChannel; i += windowSize { - endSample := i + windowSize - if endSample > samplesPerChannel { - endSample = samplesPerChannel - } + for i := 0; i < samplesPerChannel; i++ { + isSilent := true + for ch := 0; ch < channels; ch++ { + offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample + if offset+bytesPerSample > len(audioBytes) { + continue + } - rms := calculateRMS(i, endSample) + // Convert bytes to sample value + var sample int16 + if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil { + continue + } - if rms > silenceThreshold { - // 检测到声音 - lastNonSilent = endSample - consecutiveSilentWindows = 0 - } else { - // 检测到静音 - consecutiveSilentWindows++ - - // 如果连续静音窗口足够多,认为找到了真正的静音开始点 - if consecutiveSilentWindows >= requiredSilentWindows { - lastNonSilent = i + // Normalize sample to [-1, 1] range + normalizedSample := float64(sample) / 32768.0 + if math.Abs(normalizedSample) > silenceThreshold { + isSilent = false break } } + + if !isSilent { + lastNonSilent = i + } } - // 添加缓冲区,但减少缓冲区大小 + // Add a small buffer (e.g., 0.1 seconds) after the last non-silent sample + bufferSamples := int(float64(fmtChunk.SampleRate) * 0.1) lastSample := lastNonSilent + bufferSamples if lastSample > samplesPerChannel { lastSample = samplesPerChannel } - // 确保至少保留一些音频数据 - if lastSample < int(fmtChunk.SampleRate)*2 { // 至少保留2秒 - lastSample = int(fmtChunk.SampleRate) * 2 - if lastSample > samplesPerChannel { - lastSample = samplesPerChannel - } - } - // Calculate new data size newDataSize := lastSample * int(fmtChunk.BlockAlign) trimmedAudio := audioBytes[:newDataSize]