修改音频阈值
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 52s
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 52s
This commit is contained in:
parent
112a7dd70d
commit
0c708da80d
@ -616,44 +616,93 @@ func (s *LLMService) TrimAudioSilence(audioData string) (string, error) {
|
||||
channels := int(fmtChunk.NumChannels)
|
||||
bytesPerSample := int(fmtChunk.BitsPerSample) / 8
|
||||
|
||||
// Find the last non-silent sample
|
||||
// 优化后的静音检测参数
|
||||
silenceThreshold := 0.005 // 降低静音阈值,更敏感地检测声音
|
||||
windowSize := int(float64(fmtChunk.SampleRate) * 0.05) // 50ms滑动窗口
|
||||
minSilenceDuration := int(float64(fmtChunk.SampleRate) * 0.1) // 100ms最小静音持续时间
|
||||
bufferSamples := int(float64(fmtChunk.SampleRate) * 0.05) // 减少缓冲区到50ms
|
||||
|
||||
// 计算RMS能量的辅助函数
|
||||
calculateRMS := func(startSample, endSample int) float64 {
|
||||
if startSample >= endSample || startSample >= samplesPerChannel {
|
||||
return 0.0
|
||||
}
|
||||
if endSample > samplesPerChannel {
|
||||
endSample = samplesPerChannel
|
||||
}
|
||||
|
||||
var sum float64
|
||||
sampleCount := 0
|
||||
|
||||
for i := startSample; i < endSample; i++ {
|
||||
for ch := 0; ch < channels; ch++ {
|
||||
offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample
|
||||
if offset+bytesPerSample > len(audioBytes) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Convert bytes to sample value
|
||||
var sample int16
|
||||
if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Normalize sample to [-1, 1] range and square it
|
||||
normalizedSample := float64(sample) / 32768.0
|
||||
sum += normalizedSample * normalizedSample
|
||||
sampleCount++
|
||||
}
|
||||
}
|
||||
|
||||
if sampleCount == 0 {
|
||||
return 0.0
|
||||
}
|
||||
return math.Sqrt(sum / float64(sampleCount))
|
||||
}
|
||||
|
||||
// 使用滑动窗口检测静音
|
||||
lastNonSilent := 0
|
||||
silenceThreshold := 0.01 // Adjust this threshold as needed
|
||||
consecutiveSilentWindows := 0
|
||||
requiredSilentWindows := minSilenceDuration / windowSize
|
||||
|
||||
for i := 0; i < samplesPerChannel; i++ {
|
||||
isSilent := true
|
||||
for ch := 0; ch < channels; ch++ {
|
||||
offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample
|
||||
if offset+bytesPerSample > len(audioBytes) {
|
||||
continue
|
||||
}
|
||||
for i := 0; i < samplesPerChannel; i += windowSize {
|
||||
endSample := i + windowSize
|
||||
if endSample > samplesPerChannel {
|
||||
endSample = samplesPerChannel
|
||||
}
|
||||
|
||||
// Convert bytes to sample value
|
||||
var sample int16
|
||||
if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil {
|
||||
continue
|
||||
}
|
||||
rms := calculateRMS(i, endSample)
|
||||
|
||||
// Normalize sample to [-1, 1] range
|
||||
normalizedSample := float64(sample) / 32768.0
|
||||
if math.Abs(normalizedSample) > silenceThreshold {
|
||||
isSilent = false
|
||||
if rms > silenceThreshold {
|
||||
// 检测到声音
|
||||
lastNonSilent = endSample
|
||||
consecutiveSilentWindows = 0
|
||||
} else {
|
||||
// 检测到静音
|
||||
consecutiveSilentWindows++
|
||||
|
||||
// 如果连续静音窗口足够多,认为找到了真正的静音开始点
|
||||
if consecutiveSilentWindows >= requiredSilentWindows {
|
||||
lastNonSilent = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !isSilent {
|
||||
lastNonSilent = i
|
||||
}
|
||||
}
|
||||
|
||||
// Add a small buffer (e.g., 0.1 seconds) after the last non-silent sample
|
||||
bufferSamples := int(float64(fmtChunk.SampleRate) * 0.1)
|
||||
// 添加缓冲区,但减少缓冲区大小
|
||||
lastSample := lastNonSilent + bufferSamples
|
||||
if lastSample > samplesPerChannel {
|
||||
lastSample = samplesPerChannel
|
||||
}
|
||||
|
||||
// 确保至少保留一些音频数据
|
||||
if lastSample < int(fmtChunk.SampleRate)*2 { // 至少保留2秒
|
||||
lastSample = int(fmtChunk.SampleRate) * 2
|
||||
if lastSample > samplesPerChannel {
|
||||
lastSample = samplesPerChannel
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate new data size
|
||||
newDataSize := lastSample * int(fmtChunk.BlockAlign)
|
||||
trimmedAudio := audioBytes[:newDataSize]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user