修复音频裁剪问题
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 3s

This commit is contained in:
Song367 2025-06-18 17:22:26 +08:00
parent 962b9d785a
commit 1534a0228e

View File

@ -616,93 +616,44 @@ func (s *LLMService) TrimAudioSilence(audioData string) (string, error) {
channels := int(fmtChunk.NumChannels)
bytesPerSample := int(fmtChunk.BitsPerSample) / 8
// 优化后的静音检测参数
silenceThreshold := 0.01 // 降低静音阈值,更敏感地检测声音
windowSize := int(float64(fmtChunk.SampleRate) * 0.1) // 50ms滑动窗口
minSilenceDuration := int(float64(fmtChunk.SampleRate) * 0.1) // 100ms最小静音持续时间
bufferSamples := int(float64(fmtChunk.SampleRate) * 0.1) // 减少缓冲区到50ms
// 计算RMS能量的辅助函数
calculateRMS := func(startSample, endSample int) float64 {
if startSample >= endSample || startSample >= samplesPerChannel {
return 0.0
}
if endSample > samplesPerChannel {
endSample = samplesPerChannel
}
var sum float64
sampleCount := 0
for i := startSample; i < endSample; i++ {
for ch := 0; ch < channels; ch++ {
offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample
if offset+bytesPerSample > len(audioBytes) {
continue
}
// Convert bytes to sample value
var sample int16
if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil {
continue
}
// Normalize sample to [-1, 1] range and square it
normalizedSample := float64(sample) / 32768.0
sum += normalizedSample * normalizedSample
sampleCount++
}
}
if sampleCount == 0 {
return 0.0
}
return math.Sqrt(sum / float64(sampleCount))
}
// 使用滑动窗口检测静音
// Find the last non-silent sample
lastNonSilent := 0
consecutiveSilentWindows := 0
requiredSilentWindows := minSilenceDuration / windowSize
silenceThreshold := 0.01 // Adjust this threshold as needed
for i := 0; i < samplesPerChannel; i += windowSize {
endSample := i + windowSize
if endSample > samplesPerChannel {
endSample = samplesPerChannel
}
for i := 0; i < samplesPerChannel; i++ {
isSilent := true
for ch := 0; ch < channels; ch++ {
offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample
if offset+bytesPerSample > len(audioBytes) {
continue
}
rms := calculateRMS(i, endSample)
// Convert bytes to sample value
var sample int16
if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil {
continue
}
if rms > silenceThreshold {
// 检测到声音
lastNonSilent = endSample
consecutiveSilentWindows = 0
} else {
// 检测到静音
consecutiveSilentWindows++
// 如果连续静音窗口足够多,认为找到了真正的静音开始点
if consecutiveSilentWindows >= requiredSilentWindows {
lastNonSilent = i
// Normalize sample to [-1, 1] range
normalizedSample := float64(sample) / 32768.0
if math.Abs(normalizedSample) > silenceThreshold {
isSilent = false
break
}
}
if !isSilent {
lastNonSilent = i
}
}
// 添加缓冲区,但减少缓冲区大小
// Add a small buffer (e.g., 0.1 seconds) after the last non-silent sample
bufferSamples := int(float64(fmtChunk.SampleRate) * 0.1)
lastSample := lastNonSilent + bufferSamples
if lastSample > samplesPerChannel {
lastSample = samplesPerChannel
}
// 确保至少保留一些音频数据
if lastSample < int(fmtChunk.SampleRate)*2 { // 至少保留2秒
lastSample = int(fmtChunk.SampleRate) * 2
if lastSample > samplesPerChannel {
lastSample = samplesPerChannel
}
}
// Calculate new data size
newDataSize := lastSample * int(fmtChunk.BlockAlign)
trimmedAudio := audioBytes[:newDataSize]