修改音频阈值
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 52s
All checks were successful
Gitea Actions Demo / Explore-Gitea-Actions (push) Successful in 52s
This commit is contained in:
parent
112a7dd70d
commit
0c708da80d
@ -616,44 +616,93 @@ func (s *LLMService) TrimAudioSilence(audioData string) (string, error) {
|
|||||||
channels := int(fmtChunk.NumChannels)
|
channels := int(fmtChunk.NumChannels)
|
||||||
bytesPerSample := int(fmtChunk.BitsPerSample) / 8
|
bytesPerSample := int(fmtChunk.BitsPerSample) / 8
|
||||||
|
|
||||||
// Find the last non-silent sample
|
// 优化后的静音检测参数
|
||||||
|
silenceThreshold := 0.005 // 降低静音阈值,更敏感地检测声音
|
||||||
|
windowSize := int(float64(fmtChunk.SampleRate) * 0.05) // 50ms滑动窗口
|
||||||
|
minSilenceDuration := int(float64(fmtChunk.SampleRate) * 0.1) // 100ms最小静音持续时间
|
||||||
|
bufferSamples := int(float64(fmtChunk.SampleRate) * 0.05) // 减少缓冲区到50ms
|
||||||
|
|
||||||
|
// 计算RMS能量的辅助函数
|
||||||
|
calculateRMS := func(startSample, endSample int) float64 {
|
||||||
|
if startSample >= endSample || startSample >= samplesPerChannel {
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
if endSample > samplesPerChannel {
|
||||||
|
endSample = samplesPerChannel
|
||||||
|
}
|
||||||
|
|
||||||
|
var sum float64
|
||||||
|
sampleCount := 0
|
||||||
|
|
||||||
|
for i := startSample; i < endSample; i++ {
|
||||||
|
for ch := 0; ch < channels; ch++ {
|
||||||
|
offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample
|
||||||
|
if offset+bytesPerSample > len(audioBytes) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert bytes to sample value
|
||||||
|
var sample int16
|
||||||
|
if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize sample to [-1, 1] range and square it
|
||||||
|
normalizedSample := float64(sample) / 32768.0
|
||||||
|
sum += normalizedSample * normalizedSample
|
||||||
|
sampleCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if sampleCount == 0 {
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
return math.Sqrt(sum / float64(sampleCount))
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用滑动窗口检测静音
|
||||||
lastNonSilent := 0
|
lastNonSilent := 0
|
||||||
silenceThreshold := 0.01 // Adjust this threshold as needed
|
consecutiveSilentWindows := 0
|
||||||
|
requiredSilentWindows := minSilenceDuration / windowSize
|
||||||
|
|
||||||
for i := 0; i < samplesPerChannel; i++ {
|
for i := 0; i < samplesPerChannel; i += windowSize {
|
||||||
isSilent := true
|
endSample := i + windowSize
|
||||||
for ch := 0; ch < channels; ch++ {
|
if endSample > samplesPerChannel {
|
||||||
offset := i*int(fmtChunk.BlockAlign) + ch*bytesPerSample
|
endSample = samplesPerChannel
|
||||||
if offset+bytesPerSample > len(audioBytes) {
|
}
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Convert bytes to sample value
|
rms := calculateRMS(i, endSample)
|
||||||
var sample int16
|
|
||||||
if err := binary.Read(bytes.NewReader(audioBytes[offset:offset+bytesPerSample]), binary.LittleEndian, &sample); err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize sample to [-1, 1] range
|
if rms > silenceThreshold {
|
||||||
normalizedSample := float64(sample) / 32768.0
|
// 检测到声音
|
||||||
if math.Abs(normalizedSample) > silenceThreshold {
|
lastNonSilent = endSample
|
||||||
isSilent = false
|
consecutiveSilentWindows = 0
|
||||||
|
} else {
|
||||||
|
// 检测到静音
|
||||||
|
consecutiveSilentWindows++
|
||||||
|
|
||||||
|
// 如果连续静音窗口足够多,认为找到了真正的静音开始点
|
||||||
|
if consecutiveSilentWindows >= requiredSilentWindows {
|
||||||
|
lastNonSilent = i
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !isSilent {
|
|
||||||
lastNonSilent = i
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a small buffer (e.g., 0.1 seconds) after the last non-silent sample
|
// 添加缓冲区,但减少缓冲区大小
|
||||||
bufferSamples := int(float64(fmtChunk.SampleRate) * 0.1)
|
|
||||||
lastSample := lastNonSilent + bufferSamples
|
lastSample := lastNonSilent + bufferSamples
|
||||||
if lastSample > samplesPerChannel {
|
if lastSample > samplesPerChannel {
|
||||||
lastSample = samplesPerChannel
|
lastSample = samplesPerChannel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 确保至少保留一些音频数据
|
||||||
|
if lastSample < int(fmtChunk.SampleRate)*2 { // 至少保留2秒
|
||||||
|
lastSample = int(fmtChunk.SampleRate) * 2
|
||||||
|
if lastSample > samplesPerChannel {
|
||||||
|
lastSample = samplesPerChannel
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate new data size
|
// Calculate new data size
|
||||||
newDataSize := lastSample * int(fmtChunk.BlockAlign)
|
newDataSize := lastSample * int(fmtChunk.BlockAlign)
|
||||||
trimmedAudio := audioBytes[:newDataSize]
|
trimmedAudio := audioBytes[:newDataSize]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user