修改音频阈值
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 52s

This commit is contained in:
Song367 2025-06-18 16:23:42 +08:00
parent 112a7dd70d
commit 0c708da80d

View File

@ -616,44 +616,93 @@ func (s *LLMService) TrimAudioSilence(audioData string) (string, error) {
channels := int(fmtChunk.NumChannels) channels := int(fmtChunk.NumChannels)
bytesPerSample := int(fmtChunk.BitsPerSample) / 8 bytesPerSample := int(fmtChunk.BitsPerSample) / 8
// Find the last non-silent sample // 优化后的静音检测参数
silenceThreshold := 0.005 // 降低静音阈值,更敏感地检测声音
windowSize := int(float64(fmtChunk.SampleRate) * 0.05) // 50ms滑动窗口
minSilenceDuration := int(float64(fmtChunk.SampleRate) * 0.1) // 100ms最小静音持续时间
bufferSamples := int(float64(fmtChunk.SampleRate) * 0.05) // 减少缓冲区到50ms
// 计算RMS能量的辅助函数
calculateRMS := func(startSample, endSample int) float64 {
if startSample >= endSample || startSample >= samplesPerChannel {
return 0.0
}
if endSample > samplesPerChannel {
endSample = samplesPerChannel
}
var sum float64
sampleCount := 0
for i := startSample; i < endSample; i++ {
for ch := 0; ch < channels; ch++ {
offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample
if offset+bytesPerSample > len(audioBytes) {
continue
}
// Convert bytes to sample value
var sample int16
if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil {
continue
}
// Normalize sample to [-1, 1] range and square it
normalizedSample := float64(sample) / 32768.0
sum += normalizedSample * normalizedSample
sampleCount++
}
}
if sampleCount == 0 {
return 0.0
}
return math.Sqrt(sum / float64(sampleCount))
}
// 使用滑动窗口检测静音
lastNonSilent := 0 lastNonSilent := 0
silenceThreshold := 0.01 // Adjust this threshold as needed consecutiveSilentWindows := 0
requiredSilentWindows := minSilenceDuration / windowSize
for i := 0; i < samplesPerChannel; i++ { for i := 0; i < samplesPerChannel; i += windowSize {
isSilent := true endSample := i + windowSize
for ch := 0; ch < channels; ch++ { if endSample > samplesPerChannel {
offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample endSample = samplesPerChannel
if offset+bytesPerSample > len(audioBytes) { }
continue
}
// Convert bytes to sample value rms := calculateRMS(i, endSample)
var sample int16
if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil {
continue
}
// Normalize sample to [-1, 1] range if rms > silenceThreshold {
normalizedSample := float64(sample) / 32768.0 // 检测到声音
if math.Abs(normalizedSample) > silenceThreshold { lastNonSilent = endSample
isSilent = false consecutiveSilentWindows = 0
} else {
// 检测到静音
consecutiveSilentWindows++
// 如果连续静音窗口足够多,认为找到了真正的静音开始点
if consecutiveSilentWindows >= requiredSilentWindows {
lastNonSilent = i
break break
} }
} }
if !isSilent {
lastNonSilent = i
}
} }
// Add a small buffer (e.g., 0.1 seconds) after the last non-silent sample // 添加缓冲区,但减少缓冲区大小
bufferSamples := int(float64(fmtChunk.SampleRate) * 0.1)
lastSample := lastNonSilent + bufferSamples lastSample := lastNonSilent + bufferSamples
if lastSample > samplesPerChannel { if lastSample > samplesPerChannel {
lastSample = samplesPerChannel lastSample = samplesPerChannel
} }
// 确保至少保留一些音频数据
if lastSample < int(fmtChunk.SampleRate)*2 { // 至少保留2秒
lastSample = int(fmtChunk.SampleRate) * 2
if lastSample > samplesPerChannel {
lastSample = samplesPerChannel
}
}
// Calculate new data size // Calculate new data size
newDataSize := lastSample * int(fmtChunk.BlockAlign) newDataSize := lastSample * int(fmtChunk.BlockAlign)
trimmedAudio := audioBytes[:newDataSize] trimmedAudio := audioBytes[:newDataSize]