%matplotlib inline
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
from scipy import fftpack
f = 10 # Frequency, in cycles per second, or Hertz
fs = 100 # Sampling rate, or number of data points per second
Duration = 2 # Duration of the signal (seconds)
M = int(Duration * fs) # total number of time points
t = np.linspace(0, Duration-1/fs, M) # an array of timepoints
x = np.sin(2*np.pi*f* t) # the signal is sampled from a sine function
fig, ax = plt.subplots()
ax.plot(t, x, '-') # change '.' to '.-'
ax.set_xlabel('Time t [s]', fontsize=16)
ax.set_ylabel('Signal Amplitude', fontsize=16)
ax.set_title('Signal x is sampled from sin(2*3.14*f*t), f=10Hz, f_s is 100 Hz', fontsize=16)
Text(0.5, 1.0, 'Signal x is sampled from sin(2*3.14*f*t), f=10Hz, f_s is 100 Hz')
We now apply Discrete Fourier transform (DFT) to the signal. X= DFT(x)
To speed up the computation, the FFT algorithm is used
X = fftpack.fft(x) # X = DFT(x)
# the the absolute value of X which is DFT(x)
X_abs=np.abs(X)
#X_abs[k] is |X[k]|, the magnitude of the cos~sin component indexed by k
#print(X[X_abs.argmax()])
#compute the frequencies of the cos~sin components
freqs = fftpack.fftfreq(len(x)) * fs
#freqs[k] is the frequency (Hz) of the cos~sin component indexed by k
#fftpack.fftfreq modifies freqs[k] when k > N/2
#freqs = np.arange(0, N); freqs[N//2:]-=N; freqs =freqs*f_s/N
X
array([-7.38128238e-15-0.00000000e+00j, 1.80898291e-14-1.47628987e-14j,
3.16614610e-14+2.46608743e-14j, 3.08524344e-14-1.63347092e-14j,
8.19959442e-15+5.53232678e-14j, 1.86641649e-14+4.42040810e-14j,
-2.30635563e-14-1.32071780e-14j, -9.60768562e-15+4.32312904e-14j,
-3.08143819e-16+1.41757696e-14j, 1.54969947e-14+2.17168219e-14j,
-8.99703753e-15+1.93560777e-14j, -2.27631233e-14+8.54404980e-14j,
-9.70687516e-14+5.01684314e-15j, -5.23978299e-14-5.27372167e-14j,
2.39616397e-14-3.12488784e-16j, -1.41521879e-14-8.82928778e-15j,
-2.26317503e-15+8.00987153e-15j, -4.12626324e-14-9.67035408e-15j,
4.33590418e-15+1.34330709e-14j, -1.83434082e-14+1.36086649e-14j,
-9.89601126e-14-1.00000000e+02j, -8.79121193e-14-4.41137082e-14j,
1.43533083e-14-1.22907168e-13j, 4.94200476e-14+1.42529430e-14j,
-5.41645759e-16-2.18987620e-14j, 1.02603182e-14-7.12885736e-15j,
2.45472946e-14-4.26945914e-16j, -2.67274290e-14+6.44887795e-14j,
-1.14642231e-13-5.93833662e-14j, 4.44184432e-14-7.13861701e-14j,
-2.29476366e-14-3.39805215e-14j, 5.15177191e-14-2.01276703e-14j,
1.53058582e-15-9.44338986e-16j, 1.55411072e-14-4.42543906e-14j,
-2.32756956e-14+3.20190140e-14j, 1.67430660e-14-5.08498437e-14j,
2.40060267e-14-4.67230809e-14j, 2.37405641e-14+3.73420846e-14j,
2.06348651e-14-4.26758171e-14j, 2.68758756e-14+2.62446014e-14j,
-5.06089153e-16-2.01017356e-15j, -6.55377945e-15-3.62623691e-15j,
6.53974645e-15-1.42880898e-14j, 4.44573604e-14+2.27048539e-14j,
-1.79902064e-14-6.42101157e-16j, 9.64762548e-16-3.53594469e-15j,
1.58492552e-14+6.27899512e-15j, -2.14914442e-14+1.67426442e-14j,
1.76164547e-15+2.36294661e-15j, 2.83978810e-15-1.01059746e-14j,
1.59599998e-14-1.57385683e-14j, -3.36487357e-14+3.83600486e-14j,
-1.37837622e-14-5.75439715e-14j, 3.42153452e-14+4.23023155e-15j,
1.50684775e-14+3.76144406e-14j, -2.92429096e-14-1.68586210e-14j,
-4.79853772e-15-5.59748177e-15j, -8.80918931e-15-1.03110642e-14j,
-4.79183114e-15+5.02889631e-15j, -9.41887629e-15-2.58992379e-14j,
2.06149242e-14+9.36633764e-15j, -6.40924309e-14-1.99377686e-14j,
5.18594164e-15-1.27186586e-13j, 1.29475608e-13+3.37356503e-14j,
7.08593432e-16+2.34061996e-14j, -2.36558043e-14+4.12815574e-15j,
4.17835424e-14+2.59166300e-15j, 1.75584707e-14+2.24715493e-15j,
-2.56698354e-14+1.75210627e-14j, 2.10199009e-14-1.23902892e-14j,
-6.21868882e-15+8.65204264e-15j, 1.83096937e-14+1.27141546e-14j,
-7.56380110e-16+9.09543462e-15j, -2.08430411e-14-6.01319807e-15j,
2.08309283e-14+9.62075322e-15j, -2.27492396e-14+1.75292684e-15j,
-1.42384482e-14-2.38737255e-14j, 1.22839405e-14+1.51495000e-14j,
1.23410145e-14-1.81409799e-14j, 6.19618742e-15-5.24883143e-15j,
-6.58955695e-15-7.10542736e-15j, -1.14773153e-14+1.39873425e-15j,
-3.05848072e-15-8.67856280e-15j, 3.81784763e-14-1.31607167e-14j,
-5.18816178e-15-1.59457400e-14j, 9.13122892e-15-4.95807060e-15j,
3.54922581e-14+1.10111091e-14j, -1.79486515e-14+2.01886752e-14j,
6.62622094e-15-1.95486514e-15j, -1.41527563e-15-1.54245140e-14j,
2.82339712e-14-1.32687660e-14j, 1.00975372e-14+1.96798353e-14j,
-1.22301668e-14+9.10313933e-15j, 1.84324341e-14+1.05580588e-14j,
2.18371803e-14+3.84896897e-14j, -8.24694487e-14-1.88565868e-14j,
-4.11157984e-15-3.47789987e-14j, 6.38544456e-14-2.37782686e-14j,
-1.36116979e-14+2.04427412e-14j, 1.92105482e-15+1.43664089e-14j,
7.32685882e-15-0.00000000e+00j, 1.92105482e-15-1.43664089e-14j,
-1.36116979e-14-2.04427412e-14j, 6.38544456e-14+2.37782686e-14j,
-4.11157984e-15+3.47789987e-14j, -8.24694487e-14+1.88565868e-14j,
2.18371803e-14-3.84896897e-14j, 1.84324341e-14-1.05580588e-14j,
-1.22301668e-14-9.10313933e-15j, 1.00975372e-14-1.96798353e-14j,
2.82339712e-14+1.32687660e-14j, -1.41527563e-15+1.54245140e-14j,
6.62622094e-15+1.95486514e-15j, -1.79486515e-14-2.01886752e-14j,
3.54922581e-14-1.10111091e-14j, 9.13122892e-15+4.95807060e-15j,
-5.18816178e-15+1.59457400e-14j, 3.81784763e-14+1.31607167e-14j,
-3.05848072e-15+8.67856280e-15j, -1.14773153e-14-1.39873425e-15j,
-6.58955695e-15+7.10542736e-15j, 6.19618742e-15+5.24883143e-15j,
1.23410145e-14+1.81409799e-14j, 1.22839405e-14-1.51495000e-14j,
-1.42384482e-14+2.38737255e-14j, -2.27492396e-14-1.75292684e-15j,
2.08309283e-14-9.62075322e-15j, -2.08430411e-14+6.01319807e-15j,
-7.56380110e-16-9.09543462e-15j, 1.83096937e-14-1.27141546e-14j,
-6.21868882e-15-8.65204264e-15j, 2.10199009e-14+1.23902892e-14j,
-2.56698354e-14-1.75210627e-14j, 1.75584707e-14-2.24715493e-15j,
4.17835424e-14-2.59166300e-15j, -2.36558043e-14-4.12815574e-15j,
7.08593432e-16-2.34061996e-14j, 1.29475608e-13-3.37356503e-14j,
5.18594164e-15+1.27186586e-13j, -6.40924309e-14+1.99377686e-14j,
2.06149242e-14-9.36633764e-15j, -9.41887629e-15+2.58992379e-14j,
-4.79183114e-15-5.02889631e-15j, -8.80918931e-15+1.03110642e-14j,
-4.79853772e-15+5.59748177e-15j, -2.92429096e-14+1.68586210e-14j,
1.50684775e-14-3.76144406e-14j, 3.42153452e-14-4.23023155e-15j,
-1.37837622e-14+5.75439715e-14j, -3.36487357e-14-3.83600486e-14j,
1.59599998e-14+1.57385683e-14j, 2.83978810e-15+1.01059746e-14j,
1.76164547e-15-2.36294661e-15j, -2.14914442e-14-1.67426442e-14j,
1.58492552e-14-6.27899512e-15j, 9.64762548e-16+3.53594469e-15j,
-1.79902064e-14+6.42101157e-16j, 4.44573604e-14-2.27048539e-14j,
6.53974645e-15+1.42880898e-14j, -6.55377945e-15+3.62623691e-15j,
-5.06089153e-16+2.01017356e-15j, 2.68758756e-14-2.62446014e-14j,
2.06348651e-14+4.26758171e-14j, 2.37405641e-14-3.73420846e-14j,
2.40060267e-14+4.67230809e-14j, 1.67430660e-14+5.08498437e-14j,
-2.32756956e-14-3.20190140e-14j, 1.55411072e-14+4.42543906e-14j,
1.53058582e-15+9.44338986e-16j, 5.15177191e-14+2.01276703e-14j,
-2.29476366e-14+3.39805215e-14j, 4.44184432e-14+7.13861701e-14j,
-1.14642231e-13+5.93833662e-14j, -2.67274290e-14-6.44887795e-14j,
2.45472946e-14+4.26945914e-16j, 1.02603182e-14+7.12885736e-15j,
-5.41645759e-16+2.18987620e-14j, 4.94200476e-14-1.42529430e-14j,
1.43533083e-14+1.22907168e-13j, -8.79121193e-14+4.41137082e-14j,
-9.89601126e-14+1.00000000e+02j, -1.83434082e-14-1.36086649e-14j,
4.33590418e-15-1.34330709e-14j, -4.12626324e-14+9.67035408e-15j,
-2.26317503e-15-8.00987153e-15j, -1.41521879e-14+8.82928778e-15j,
2.39616397e-14+3.12488784e-16j, -5.23978299e-14+5.27372167e-14j,
-9.70687516e-14-5.01684314e-15j, -2.27631233e-14-8.54404980e-14j,
-8.99703753e-15-1.93560777e-14j, 1.54969947e-14-2.17168219e-14j,
-3.08143819e-16-1.41757696e-14j, -9.60768562e-15-4.32312904e-14j,
-2.30635563e-14+1.32071780e-14j, 1.86641649e-14-4.42040810e-14j,
8.19959442e-15-5.53232678e-14j, 3.08524344e-14+1.63347092e-14j,
3.16614610e-14-2.46608743e-14j, 1.80898291e-14+1.47628987e-14j])
fig, ax = plt.subplots()
ax.stem(freqs, X_abs) # it plots vertical lines
ax.set_xlabel('Frequency in Hertz [Hz]',fontsize=16)
ax.set_ylabel('Magnitude',fontsize=16)
ax.set_xlim(0, fs / 2)
ax.set_ylim(-5, 110)
ax.set_title('Magnitude Spectrum: |X|, X = DFT(x)',fontsize=16)
We can use DFT to analyze the frequency components of a signal
f = 10 # Signal Frequency
fs = 100 # Sampling rate, the number of smapled data points per second
Duration = 2
M = int(Duration * fs) # total number of time points obtained in 2 (seconds)
t = np.linspace(0, Duration-1/fs, M) # an array of timepoints
x = np.sin(2*np.pi*f*t) + 0.5*np.sin(2*np.pi*(2*f)*t)
#
fig, ax = plt.subplots()
ax.plot(t, x, '-')
ax.set_xlabel('Time t [s]', fontsize=16)
ax.set_ylabel('Signal Amplitude', fontsize=16)
ax.set_title('x = sin(2*3.14*f*t) + 0.5*sin(2*3.14*2f*t), f=10Hz', fontsize=16)
Text(0.5, 1.0, 'x = sin(2*3.14*f*t) + 0.5*sin(2*3.14*2f*t), f=10Hz')
# X is DFT(x)
X = fftpack.fft(x)
X_abs=np.abs(X)
#
fig, ax = plt.subplots()
freqs = fftpack.fftfreq(len(x)) * fs
ax.stem(freqs, X_abs) # it plots vertical lines
ax.set_xlabel('Frequency in Hertz [Hz]', fontsize=16)
ax.set_ylabel('Magnitude', fontsize=16)
ax.set_xlim(0, fs / 2)
ax.set_ylim(-5, 110)
ax.set_title('Magnitude Spectrum: |X|, X = DFT(x)', fontsize=16)
Text(0.5, 1.0, 'Magnitude Spectrum: |X|, X = DFT(x)')
Therefore, we can use DFT to analyze the frequency components of a signal
#load the voice signal x1 from the audio file 'm01ae.wav'
import scipy.io.wavfile
fs1, x1 = scipy.io.wavfile.read('m01ae.wav')
Duration1 = x1.shape[0]/fs1
M1 = int(Duration1 * fs1) # total number of time points
t1 = np.linspace(0, Duration1-1/fs1, M1) # an array of timepoints
print('sample_rate is', fs1, 'Hz')
print('Audio duration is', Duration1, 'second')
print('x1.shape is', x1.shape)
print('t1.shape is', t1.shape)
sample_rate is 16000 Hz Audio duration is 0.670625 second x1.shape is (10730,) t1.shape is (10730,)
#play the audio
from IPython.display import Audio
Audio('m01ae.wav')
fig, ax = plt.subplots()
ax.plot(t1, x1, '-b')
ax.set_xlabel('Time t (seconds)'); ax.set_ylabel('Signal Amplitude');
ax.set_title('x1: the signal - "had"')
Text(0.5, 1.0, 'x1: the signal - "had"')
# Apply DFT to the voice signal
X1 = fftpack.fft(x1)
freqs1 = fftpack.fftfreq(len(X1)) * fs1
print('X1.shape', X1.shape)
#
fig, ax = plt.subplots()
ax.plot(freqs1, np.abs(X1), '.')
ax.set_xlabel('Frequency (Hz)', fontsize=16)
ax.set_ylabel('Magnitude', fontsize=16)
ax.set_xlim(0, 1000)
ax.set_title('Magnitude Spectrum: X1 = DFT(x1), x1 is "had"', fontsize=16)
ax.grid(True)
X1.shape (10730,)
The above signal x has a strong frequency component ~650Hz
This is the signature of a in 'had'
#load another voice signal x2 from the audio file 'm02ae.wav'
fs2, x2 = scipy.io.wavfile.read('m02ae.wav')
Duration2 = x2.shape[0]/fs2
M2 = int(Duration2*fs2)
t2 = np.linspace(0, Duration2-1/fs2, M2) # an array of timepoints
print('sample_rate is', fs2, 'Hz')
print('Audio length is', Duration2, 'second')
print('x2.shape is', x2.shape)
print('t2.shape is', t2.shape)
#
fig, ax = plt.subplots(1, 1)
ax.plot(t2, x2, '-b')
ax.set_xlabel('Time t (seconds)')
ax.set_ylabel('Signal Amplitude')
ax.set_title('x2: the signal - "had" from another person')
Audio('m02ae.wav')
sample_rate is 16000 Hz Audio length is 0.502375 second x2.shape is (8038,) t2.shape is (8038,)
X2 = fftpack.fft(x2)
freqs2 = fftpack.fftfreq(len(X2)) * fs2
print('signal_dft.shape', X2.shape)
fig, ax = plt.subplots()
ax.plot(freqs2, np.abs(X2), '.')
ax.set_xlabel('Frequency (Hz)')
ax.set_ylabel('Magnitude of Frequency Component')
ax.set_title('Magnitude Spectrum: X2 = DFT(x2), x2 is "had"', fontsize=16)
ax.set_xlim(0, 1000)
signal_dft.shape (8038,)
(0.0, 1000.0)
#let's load and analyze the third signal
fs3, x3 = scipy.io.wavfile.read('m01ei.wav')
Duration3 = x3.shape[0]/fs3
M3 = int(Duration3*fs3)
t3 = np.linspace(0, Duration3-1/fs3, M3) # an array of timepoints
X3 = fftpack.fft(x3)
freqs3 = fftpack.fftfreq(len(X3)) * fs3
#
fig, ax = plt.subplots(1, 2, constrained_layout=True)
ax[0].plot(t3, x3, '-b')
ax[0].set_xlabel('Time t (seconds)')
ax[0].set_ylabel('Signal Amplitude')
ax[0].set_title('x3: the signal - "hayed"')
ax[1].plot(freqs3, np.abs(X3), '.')
ax[1].set_xlabel('Frequency (Hz)')
ax[1].set_ylabel('Magnitude of Frequency Component')
ax[1].set_xlim(0, 1000)
ax[1].set_title('Magnitude Spectrum : X3 = DFT(x3)')
Audio('m01ei.wav')
The above signal x3 has a strong frequency component ~500Hz
This is the signature of ay in 'hayed'
#let's load and analyze the fourth signal
fs4, x4 = scipy.io.wavfile.read('m02ei.wav')
Duration4 = x4.shape[0]/fs4
M4 = int(Duration4*fs4)
t4 = np.linspace(0, Duration4-1/fs4, M4) # an array of timepoints in the time intervel [0, t4_max]
X4 = fftpack.fft(x4)
freqs4 = fftpack.fftfreq(len(X4)) * fs4
#
fig, ax = plt.subplots(1, 2, constrained_layout=True)
ax[0].plot(t4, x4, '-b')
ax[0].set_xlabel('Time t (seconds)')
ax[0].set_ylabel('Signal Amplitude')
ax[0].set_title('x4: the signal - "hayed"')
ax[1].plot(freqs4, np.abs(X4), '.')
ax[1].set_xlabel('Frequency (Hz)')
ax[1].set_ylabel('Magnitude of Frequency Component')
ax[1].set_xlim(0, 1000)
ax[1].set_title('Magnitude Spectrum: X4 = DFT(x4)')
Audio('m02ei.wav')
fig, ax = plt.subplots(4, 2, constrained_layout=True, figsize=(12,12))
ax[0,0].plot(t1, x1, '-b')
ax[0,0].set_title('x1: had')
ax[0,1].plot(freqs1, np.abs(X1), '.')
ax[0,1].set_title('X1 = DFT(x1)')
ax[0,1].set_xlim(0, 1000)
ax[1,0].plot(t2, x2, '-b')
ax[1,0].set_title('x2: had')
ax[1,1].plot(freqs2, np.abs(X2), '.')
ax[1,1].set_title('X2 = DFT(x2)')
ax[1,1].set_xlim(0, 1000)
ax[2,0].plot(t3, x3, '-b')
ax[2,0].set_title('x3: hayed')
ax[2,1].plot(freqs3, np.abs(X3), '.')
ax[2,1].set_title('X3 = DFT(x3)')
ax[2,1].set_xlim(0, 1000)
ax[3,0].plot(t4, x4, '-b')
ax[3,0].set_title('x4: hayed')
ax[3,1].plot(freqs4, np.abs(X4), '.')
ax[3,1].set_title('X4 = DFT(x4)')
ax[3,1].set_xlim(0, 1000)
(0.0, 1000.0)
It seems we can use a threshold of 600 Hz to differentiate between the two voices: 'had' and 'hayed'