From 0c708da80d645b166ede77f4fd27f285ed44c1c1 Mon Sep 17 00:00:00 2001 From: Song367 <601337784@qq.com> Date: Wed, 18 Jun 2025 16:23:42 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=9F=B3=E9=A2=91=E9=98=88?= =?UTF-8?q?=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- service/llm_service.go | 97 +++++++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/service/llm_service.go b/service/llm_service.go index 43e787b..e340bde 100644 --- a/service/llm_service.go +++ b/service/llm_service.go @@ -616,44 +616,93 @@ func (s *LLMService) TrimAudioSilence(audioData string) (string, error) { channels := int(fmtChunk.NumChannels) bytesPerSample := int(fmtChunk.BitsPerSample) / 8 - // Find the last non-silent sample + // 优化后的静音检测参数 + silenceThreshold := 0.005 // 降低静音阈值,更敏感地检测声音 + windowSize := int(float64(fmtChunk.SampleRate) * 0.05) // 50ms滑动窗口 + minSilenceDuration := int(float64(fmtChunk.SampleRate) * 0.1) // 100ms最小静音持续时间 + bufferSamples := int(float64(fmtChunk.SampleRate) * 0.05) // 减少缓冲区到50ms + + // 计算RMS能量的辅助函数 + calculateRMS := func(startSample, endSample int) float64 { + if startSample >= endSample || startSample >= samplesPerChannel { + return 0.0 + } + if endSample > samplesPerChannel { + endSample = samplesPerChannel + } + + var sum float64 + sampleCount := 0 + + for i := startSample; i < endSample; i++ { + for ch := 0; ch < channels; ch++ { + offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample + if offset+bytesPerSample > len(audioBytes) { + continue + } + + // Convert bytes to sample value + var sample int16 + if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil { + continue + } + + // Normalize sample to [-1, 1] range and square it + normalizedSample := float64(sample) / 32768.0 + sum += normalizedSample * normalizedSample + sampleCount++ + } + } + + if sampleCount == 0 { + return 0.0 + } + return math.Sqrt(sum / float64(sampleCount)) + } + + // 使用滑动窗口检测静音 lastNonSilent := 0 - silenceThreshold := 0.01 // Adjust this threshold as needed + consecutiveSilentWindows := 0 + requiredSilentWindows := minSilenceDuration / windowSize - for i := 0; i < samplesPerChannel; i++ { - isSilent := true - for ch := 0; ch < channels; ch++ { - offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample - if offset+bytesPerSample > len(audioBytes) { - continue - } + for i := 0; i < samplesPerChannel; i += windowSize { + endSample := i + windowSize + if endSample > samplesPerChannel { + endSample = samplesPerChannel + } - // Convert bytes to sample value - var sample int16 - if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil { - continue - } + rms := calculateRMS(i, endSample) - // Normalize sample to [-1, 1] range - normalizedSample := float64(sample) / 32768.0 - if math.Abs(normalizedSample) > silenceThreshold { - isSilent = false + if rms > silenceThreshold { + // 检测到声音 + lastNonSilent = endSample + consecutiveSilentWindows = 0 + } else { + // 检测到静音 + consecutiveSilentWindows++ + + // 如果连续静音窗口足够多,认为找到了真正的静音开始点 + if consecutiveSilentWindows >= requiredSilentWindows { + lastNonSilent = i break } } - - if !isSilent { - lastNonSilent = i - } } - // Add a small buffer (e.g., 0.1 seconds) after the last non-silent sample - bufferSamples := int(float64(fmtChunk.SampleRate) * 0.1) + // 添加缓冲区,但减少缓冲区大小 lastSample := lastNonSilent + bufferSamples if lastSample > samplesPerChannel { lastSample = samplesPerChannel } + // 确保至少保留一些音频数据 + if lastSample < int(fmtChunk.SampleRate)*2 { // 至少保留2秒 + lastSample = int(fmtChunk.SampleRate) * 2 + if lastSample > samplesPerChannel { + lastSample = samplesPerChannel + } + } + // Calculate new data size newDataSize := lastSample * int(fmtChunk.BlockAlign) trimmedAudio := audioBytes[:newDataSize]