-
Notifications
You must be signed in to change notification settings - Fork 17
/
stftpitchshift.py
137 lines (99 loc) · 4.32 KB
/
stftpitchshift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from stftpitchshift.cepster import lifter
from stftpitchshift.normalizer import normalize
from stftpitchshift.pitcher import shiftpitch
from stftpitchshift.resampler import linear as resample
from stftpitchshift.stft import stft, istft
from stftpitchshift.vocoder import encode, decode
import numpy as np
class StftPitchShift:
'''
Short-time Fourier transform (STFT) based pitch shifting.
'''
def __init__(self, framesize, hopsize, samplerate):
'''
:param framesize: The STFT frame size in samples.
:param hopsize: The STFT hop size in samples.
:param samplerate: The sample rate of the signal in hertz.
'''
self.framesize = framesize
self.hopsize = hopsize
self.samplerate = samplerate
def shiftpitch(self, input, factors = 1, quefrency = 0, distortion = 1, normalization = False):
'''
Processes a one-dimensional array of type `numpy.floating` or `numpy.integer`.
Returns the resulting array with the same dtype and shape, but at least 1D.
:param input: The input signal.
:param factors: The fractional pitch shifting factors.
:param quefrency: The optional formant lifter quefrency in seconds.
:param distortion: The fractional timbre shifting factor.
:param normalization Optionally enable spectral rms normalization.
:return: The output signal of the equal size.
'''
input = np.atleast_1d(input)
# remember input type and shape
# to apply to output
dtype = input.dtype
shape = input.shape
input = np.squeeze(input)
if input.ndim != 1:
raise ValueError(f'Invalid input shape {shape}, ' +
f'expected a one-dimensional array!')
# silently convert integer input to normalized float
# according to issue #36
if np.issubdtype(dtype, np.integer):
a, b = np.iinfo(dtype).min, np.iinfo(dtype).max
input = input.astype(float)
input = (input - a) / (b - a)
input = input * 2 - 1
elif not np.issubdtype(dtype, np.floating):
raise TypeError(f'Invalid input data type {dtype}, ' +
f'expected {np.floating} or {np.integer}!')
def isnotnormal(x):
return (np.isinf(x)) | \
(np.isnan(x)) | \
(abs(x) < np.finfo(x.dtype).tiny)
framesize = self.framesize
hopsize = self.hopsize
samplerate = self.samplerate
factors = np.asarray(factors).flatten()
quefrency = int(quefrency * samplerate)
frames = stft(input, framesize, hopsize)
frames = encode(frames, framesize, hopsize, samplerate)
if normalization:
frames0 = frames.copy()
if quefrency:
envelopes = lifter(frames, quefrency)
mask = isnotnormal(envelopes)
frames.real /= envelopes
frames.real[mask] = 0
if distortion != 1:
envelopes[mask] = 0
for i in range(len(envelopes)):
envelopes[i] = resample(envelopes[i], distortion)
mask = isnotnormal(envelopes)
frames = shiftpitch(frames, factors, samplerate)
frames.real *= envelopes
frames.real[mask] = 0
else:
frames = shiftpitch(frames, factors, samplerate)
if normalization:
frames = normalize(frames, frames0)
frames = decode(frames, framesize, hopsize, samplerate)
output = istft(frames, framesize, hopsize)
# disable reference count check on resize,
# since the output variable owns the data
# returned by istft (see also issue #31)
output.resize(shape, refcheck=False)
# silently convert output back to integer
# according to issue #36
if np.issubdtype(dtype, np.integer):
a, b = np.iinfo(dtype).min, np.iinfo(dtype).max
output = (output + 1) / 2
output = output * (b - a) + a
output = output.clip(a, b).astype(dtype)
# otherwise restore the original float type
elif output.dtype != dtype:
output = output.astype(dtype)
assert output.dtype == dtype
assert output.shape == shape
return output