修复音频裁剪问题

2025-06-18 17:22:26 +08:00 · 2025-06-18 17:22:26 +08:00 · 1534a0228e
commit 1534a0228e
parent 962b9d785a
1 changed files with 24 additions and 73 deletions
--- a/service/llm_service.go
+++ b/service/llm_service.go
@ -616,25 +616,12 @@ func (s *LLMService) TrimAudioSilence(audioData string) (string, error) {
 	channels := int(fmtChunk.NumChannels)
 	bytesPerSample := int(fmtChunk.BitsPerSample) / 8
-	// 优化后的静音检测参数
+	// Find the last non-silent sample
-	silenceThreshold := 0.01                                      // 降低静音阈值，更敏感地检测声音
+	lastNonSilent := 0
-	windowSize := int(float64(fmtChunk.SampleRate) * 0.1)         // 50ms滑动窗口
+	silenceThreshold := 0.01 // Adjust this threshold as needed
 	minSilenceDuration := int(float64(fmtChunk.SampleRate) * 0.1) // 100ms最小静音持续时间
 	bufferSamples := int(float64(fmtChunk.SampleRate) * 0.1)      // 减少缓冲区到50ms
-	// 计算RMS能量的辅助函数
+	for i := 0; i < samplesPerChannel; i++ {
-	calculateRMS := func(startSample, endSample int) float64 {
+		isSilent := true
 		if startSample >= endSample || startSample >= samplesPerChannel {
 			return 0.0
 		}
 		if endSample > samplesPerChannel {
 			endSample = samplesPerChannel
 		}
 		var sum float64
 		sampleCount := 0
 		for i := startSample; i < endSample; i++ {
 		for ch := 0; ch < channels; ch++ {
 			offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample
 			if offset+bytesPerSample > len(audioBytes) {
@ -647,62 +634,26 @@ func (s *LLMService) TrimAudioSilence(audioData string) (string, error) {
 				continue
 			}
-				// Normalize sample to [-1, 1] range and square it
+			// Normalize sample to [-1, 1] range
 			normalizedSample := float64(sample) / 32768.0
-				sum += normalizedSample * normalizedSample
+			if math.Abs(normalizedSample) > silenceThreshold {
-				sampleCount++
+				isSilent = false
 			}
 		}
 		if sampleCount == 0 {
 			return 0.0
 		}
 		return math.Sqrt(sum / float64(sampleCount))
 	}
 	// 使用滑动窗口检测静音
 	lastNonSilent := 0
 	consecutiveSilentWindows := 0
 	requiredSilentWindows := minSilenceDuration / windowSize
 	for i := 0; i < samplesPerChannel; i += windowSize {
 		endSample := i + windowSize
 		if endSample > samplesPerChannel {
 			endSample = samplesPerChannel
 		}
 		rms := calculateRMS(i, endSample)
 		if rms > silenceThreshold {
 			// 检测到声音
 			lastNonSilent = endSample
 			consecutiveSilentWindows = 0
 		} else {
 			// 检测到静音
 			consecutiveSilentWindows++
 			// 如果连续静音窗口足够多，认为找到了真正的静音开始点
 			if consecutiveSilentWindows >= requiredSilentWindows {
 				lastNonSilent = i
 				break
 			}
 		}
 		if !isSilent {
 			lastNonSilent = i
 		}
 	}
-	// 添加缓冲区，但减少缓冲区大小
+	// Add a small buffer (e.g., 0.1 seconds) after the last non-silent sample
 	bufferSamples := int(float64(fmtChunk.SampleRate) * 0.1)
 	lastSample := lastNonSilent + bufferSamples
 	if lastSample > samplesPerChannel {
 		lastSample = samplesPerChannel
 	}
 	// 确保至少保留一些音频数据
 	if lastSample < int(fmtChunk.SampleRate)*2 { // 至少保留2秒
 		lastSample = int(fmtChunk.SampleRate) * 2
 		if lastSample > samplesPerChannel {
 			lastSample = samplesPerChannel
 		}
 	}
 	// Calculate new data size
 	newDataSize := lastSample * int(fmtChunk.BlockAlign)
 	trimmedAudio := audioBytes[:newDataSize]