399. Cornell Birdcall Identification | birdsong-recognition
sr = 32000
fmin = 20
fmax = sr // 2
n_channel = 128
n_fft = 2048
hop_length = 512
win_length = n_fft
def signal_noise_ratio(spec):
spec = spec.copy()
col_median = np.median(spec, axis=0, keepdims=True)
row_median = np.median(spec, axis=1, keepdims=True)
spec[spec < row_median * 1.25] = 0.0
spec[spec < col_median * 1.15] = 0.0
spec[spec > 0] = 1.0
spec = cv2.medianBlur(spec, 3)
spec = cv2.morphologyEx(spec, cv2.MORPH_CLOSE, np.ones((3, 3), np.float32))
spec_sum = spec.sum()
try:
snr = spec_sum / (spec.shape[0] * spec.shape[1] * spec.shape[2])
except:
snr = spec_sum / (spec.shape[0] * spec.shape[1])
return snr
def signal_noise_split(audio):
S, _ = spectrum._spectrogram(y=audio, power=1.0, n_fft=2048, hop_length=512, win_length=2048)
col_median = np.median(S, axis=0, keepdims=True)
row_median = np.median(S, axis=1, keepdims=True)
S[S < row_median * 3] = 0.0
S[S < col_median * 3] = 0.0
S[S > 0] = 1
S = binary_erosion(S, structure=np.ones((4, 4)))
S = binary_dilation(S, structure=np.ones((4, 4)))
indicator = S.any(axis=0)
indicator = binary_dilation(indicator, structure=np.ones(4), iterations=2)
mask = np.repeat(indicator, hop_length)
mask = binary_dilation(mask, structure=np.ones(win_length - hop_length), origin=-(win_length - hop_length)//2)
mask = mask[:len(audio)]
signal = audio[mask]
noise = audio[~mask]
return signal, noise
如果模型对任何鸟类获得了置信度较高的预测,则在