VampPluginSDK  2.1
FixedTempoEstimator.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2 
3 /*
4  Vamp
5 
6  An API for audio analysis and feature extraction plugins.
7 
8  Centre for Digital Music, Queen Mary, University of London.
9  Copyright 2006-2009 Chris Cannam and QMUL.
10 
11  Permission is hereby granted, free of charge, to any person
12  obtaining a copy of this software and associated documentation
13  files (the "Software"), to deal in the Software without
14  restriction, including without limitation the rights to use, copy,
15  modify, merge, publish, distribute, sublicense, and/or sell copies
16  of the Software, and to permit persons to whom the Software is
17  furnished to do so, subject to the following conditions:
18 
19  The above copyright notice and this permission notice shall be
20  included in all copies or substantial portions of the Software.
21 
22  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
26  ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
27  CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 
30  Except as contained in this notice, the names of the Centre for
31  Digital Music; Queen Mary, University of London; and Chris Cannam
32  shall not be used in advertising or otherwise to promote the sale,
33  use or other dealings in this Software without prior written
34  authorization.
35 */
36 
37 #include "FixedTempoEstimator.h"
38 
39 using std::string;
40 using std::vector;
41 using std::cerr;
42 using std::endl;
43 
44 using Vamp::RealTime;
45 
46 #include <cmath>
47 #include <cstdio>
48 
50 // this class just avoids us having to declare any data members in the header
51 {
52 public:
53  D(float inputSampleRate);
54  ~D();
55 
56  size_t getPreferredStepSize() const { return 64; }
57  size_t getPreferredBlockSize() const { return 256; }
58 
60  float getParameter(string id) const;
61  void setParameter(string id, float value);
62 
64 
65  bool initialise(size_t channels, size_t stepSize, size_t blockSize);
66  void reset();
67  FeatureSet process(const float *const *, RealTime);
69 
70 private:
71  void calculate();
73 
74  float lag2tempo(int);
75  int tempo2lag(float);
76 
78  size_t m_stepSize;
79  size_t m_blockSize;
80 
81  float m_minbpm;
82  float m_maxbpm;
83  float m_maxdflen;
84 
86 
87  size_t m_dfsize;
88  float *m_df;
89  float *m_r;
90  float *m_fr;
91  float *m_t;
92  size_t m_n;
93 
96 };
97 
98 FixedTempoEstimator::D::D(float inputSampleRate) :
99  m_inputSampleRate(inputSampleRate),
100  m_stepSize(0),
101  m_blockSize(0),
102  m_minbpm(50),
103  m_maxbpm(190),
104  m_maxdflen(10),
105  m_priorMagnitudes(0),
106  m_df(0),
107  m_r(0),
108  m_fr(0),
109  m_t(0),
110  m_n(0)
111 {
112 }
113 
115 {
116  delete[] m_priorMagnitudes;
117  delete[] m_df;
118  delete[] m_r;
119  delete[] m_fr;
120  delete[] m_t;
121 }
122 
125 {
126  ParameterList list;
127 
129  d.identifier = "minbpm";
130  d.name = "Minimum estimated tempo";
131  d.description = "Minimum beat-per-minute value which the tempo estimator is able to return";
132  d.unit = "bpm";
133  d.minValue = 10;
134  d.maxValue = 360;
135  d.defaultValue = 50;
136  d.isQuantized = false;
137  list.push_back(d);
138 
139  d.identifier = "maxbpm";
140  d.name = "Maximum estimated tempo";
141  d.description = "Maximum beat-per-minute value which the tempo estimator is able to return";
142  d.defaultValue = 190;
143  list.push_back(d);
144 
145  d.identifier = "maxdflen";
146  d.name = "Input duration to study";
147  d.description = "Length of audio input, in seconds, which should be taken into account when estimating tempo. There is no need to supply the plugin with any further input once this time has elapsed since the start of the audio. The tempo estimator may use only the first part of this, up to eight times the slowest beat duration: increasing this value further than that is unlikely to improve results.";
148  d.unit = "s";
149  d.minValue = 2;
150  d.maxValue = 40;
151  d.defaultValue = 10;
152  list.push_back(d);
153 
154  return list;
155 }
156 
157 float
159 {
160  if (id == "minbpm") {
161  return m_minbpm;
162  } else if (id == "maxbpm") {
163  return m_maxbpm;
164  } else if (id == "maxdflen") {
165  return m_maxdflen;
166  }
167  return 0.f;
168 }
169 
170 void
172 {
173  if (id == "minbpm") {
174  m_minbpm = value;
175  } else if (id == "maxbpm") {
176  m_maxbpm = value;
177  } else if (id == "maxdflen") {
178  m_maxdflen = value;
179  }
180 }
181 
182 static int TempoOutput = 0;
183 static int CandidatesOutput = 1;
184 static int DFOutput = 2;
185 static int ACFOutput = 3;
186 static int FilteredACFOutput = 4;
187 
190 {
191  OutputList list;
192 
194  d.identifier = "tempo";
195  d.name = "Tempo";
196  d.description = "Estimated tempo";
197  d.unit = "bpm";
198  d.hasFixedBinCount = true;
199  d.binCount = 1;
200  d.hasKnownExtents = false;
201  d.isQuantized = false;
204  d.hasDuration = true; // our returned tempo spans a certain range
205  list.push_back(d);
206 
207  d.identifier = "candidates";
208  d.name = "Tempo candidates";
209  d.description = "Possible tempo estimates, one per bin with the most likely in the first bin";
210  d.unit = "bpm";
211  d.hasFixedBinCount = false;
212  list.push_back(d);
213 
214  d.identifier = "detectionfunction";
215  d.name = "Detection Function";
216  d.description = "Onset detection function";
217  d.unit = "";
218  d.hasFixedBinCount = 1;
219  d.binCount = 1;
220  d.hasKnownExtents = true;
221  d.minValue = 0.0;
222  d.maxValue = 1.0;
223  d.isQuantized = false;
224  d.quantizeStep = 0.0;
226  if (m_stepSize) {
227  d.sampleRate = m_inputSampleRate / m_stepSize;
228  } else {
230  }
231  d.hasDuration = false;
232  list.push_back(d);
233 
234  d.identifier = "acf";
235  d.name = "Autocorrelation Function";
236  d.description = "Autocorrelation of onset detection function";
237  d.hasKnownExtents = false;
238  d.unit = "r";
239  list.push_back(d);
240 
241  d.identifier = "filtered_acf";
242  d.name = "Filtered Autocorrelation";
243  d.description = "Filtered autocorrelation of onset detection function";
244  d.unit = "r";
245  list.push_back(d);
246 
247  return list;
248 }
249 
250 bool
251 FixedTempoEstimator::D::initialise(size_t, size_t stepSize, size_t blockSize)
252 {
253  m_stepSize = stepSize;
254  m_blockSize = blockSize;
255 
256  float dfLengthSecs = m_maxdflen;
257  m_dfsize = (dfLengthSecs * m_inputSampleRate) / m_stepSize;
258 
259  m_priorMagnitudes = new float[m_blockSize/2];
260  m_df = new float[m_dfsize];
261 
262  for (size_t i = 0; i < m_blockSize/2; ++i) {
263  m_priorMagnitudes[i] = 0.f;
264  }
265  for (size_t i = 0; i < m_dfsize; ++i) {
266  m_df[i] = 0.f;
267  }
268 
269  m_n = 0;
270 
271  return true;
272 }
273 
274 void
276 {
277  if (!m_priorMagnitudes) return;
278 
279  for (size_t i = 0; i < m_blockSize/2; ++i) {
280  m_priorMagnitudes[i] = 0.f;
281  }
282  for (size_t i = 0; i < m_dfsize; ++i) {
283  m_df[i] = 0.f;
284  }
285 
286  delete[] m_r;
287  m_r = 0;
288 
289  delete[] m_fr;
290  m_fr = 0;
291 
292  delete[] m_t;
293  m_t = 0;
294 
295  m_n = 0;
296 
297  m_start = RealTime::zeroTime;
298  m_lasttime = RealTime::zeroTime;
299 }
300 
302 FixedTempoEstimator::D::process(const float *const *inputBuffers, RealTime ts)
303 {
304  FeatureSet fs;
305 
306  if (m_stepSize == 0) {
307  cerr << "ERROR: FixedTempoEstimator::process: "
308  << "FixedTempoEstimator has not been initialised"
309  << endl;
310  return fs;
311  }
312 
313  if (m_n == 0) m_start = ts;
314  m_lasttime = ts;
315 
316  if (m_n == m_dfsize) {
317  // If we have seen enough input, do the estimation and return
318  calculate();
319  fs = assembleFeatures();
320  ++m_n;
321  return fs;
322  }
323 
324  // If we have seen more than enough, just discard and return!
325  if (m_n > m_dfsize) return FeatureSet();
326 
327  float value = 0.f;
328 
329  // m_df will contain an onset detection function based on the rise
330  // in overall power from one spectral frame to the next --
331  // simplistic but reasonably effective for our purposes.
332 
333  for (size_t i = 1; i < m_blockSize/2; ++i) {
334 
335  float real = inputBuffers[0][i*2];
336  float imag = inputBuffers[0][i*2 + 1];
337 
338  float sqrmag = real * real + imag * imag;
339  value += fabsf(sqrmag - m_priorMagnitudes[i]);
340 
341  m_priorMagnitudes[i] = sqrmag;
342  }
343 
344  m_df[m_n] = value;
345 
346  ++m_n;
347  return fs;
348 }
349 
352 {
353  FeatureSet fs;
354  if (m_n > m_dfsize) return fs;
355  calculate();
356  fs = assembleFeatures();
357  ++m_n;
358  return fs;
359 }
360 
361 float
363 {
364  return 60.f / ((lag * m_stepSize) / m_inputSampleRate);
365 }
366 
367 int
369 {
370  return ((60.f / tempo) * m_inputSampleRate) / m_stepSize;
371 }
372 
373 void
375 {
376  if (m_r) {
377  cerr << "FixedTempoEstimator::calculate: calculation already happened?" << endl;
378  return;
379  }
380 
381  if (m_n < m_dfsize / 9 &&
382  m_n < (1.0 * m_inputSampleRate) / m_stepSize) { // 1 second
383  cerr << "FixedTempoEstimator::calculate: Input is too short" << endl;
384  return;
385  }
386 
387  // This function takes m_df (the detection function array filled
388  // out in process()) and calculates m_r (the raw autocorrelation)
389  // and m_fr (the filtered autocorrelation from whose peaks tempo
390  // estimates will be taken).
391 
392  int n = m_n; // length of actual df array (m_dfsize is the theoretical max)
393 
394  m_r = new float[n/2]; // raw autocorrelation
395  m_fr = new float[n/2]; // filtered autocorrelation
396  m_t = new float[n/2]; // averaged tempo estimate for each lag value
397 
398  for (int i = 0; i < n/2; ++i) {
399  m_r[i] = 0.f;
400  m_fr[i] = 0.f;
401  m_t[i] = lag2tempo(i);
402  }
403 
404  // Calculate the raw autocorrelation of the detection function
405 
406  for (int i = 0; i < n/2; ++i) {
407 
408  for (int j = i; j < n; ++j) {
409  m_r[i] += m_df[j] * m_df[j - i];
410  }
411 
412  m_r[i] /= n - i - 1;
413  }
414 
415  // Filter the autocorrelation and average out the tempo estimates
416 
417  float related[] = { 0.5, 2, 4, 8 };
418 
419  for (int i = 1; i < n/2-1; ++i) {
420 
421  m_fr[i] = m_r[i];
422 
423  int div = 1;
424 
425  for (int j = 0; j < int(sizeof(related)/sizeof(related[0])); ++j) {
426 
427  // Check for an obvious peak at each metrically related lag
428 
429  int k0 = int(i * related[j] + 0.5);
430 
431  if (k0 >= 0 && k0 < int(n/2)) {
432 
433  int kmax = 0, kmin = 0;
434  float kvmax = 0, kvmin = 0;
435  bool have = false;
436 
437  for (int k = k0 - 1; k <= k0 + 1; ++k) {
438 
439  if (k < 0 || k >= n/2) continue;
440 
441  if (!have || (m_r[k] > kvmax)) { kmax = k; kvmax = m_r[k]; }
442  if (!have || (m_r[k] < kvmin)) { kmin = k; kvmin = m_r[k]; }
443 
444  have = true;
445  }
446 
447  // Boost the original lag according to the strongest
448  // value found close to this related lag
449 
450  m_fr[i] += m_r[kmax] / 5;
451 
452  if ((kmax == 0 || m_r[kmax] > m_r[kmax-1]) &&
453  (kmax == n/2-1 || m_r[kmax] > m_r[kmax+1]) &&
454  kvmax > kvmin * 1.05) {
455 
456  // The strongest value close to the related lag is
457  // also a pretty good looking peak, so use it to
458  // improve our tempo estimate for the original lag
459 
460  m_t[i] = m_t[i] + lag2tempo(kmax) * related[j];
461  ++div;
462  }
463  }
464  }
465 
466  m_t[i] /= div;
467 
468  // Finally apply a primitive perceptual weighting (to prefer
469  // tempi of around 120-130)
470 
471  float weight = 1.f - fabsf(128.f - lag2tempo(i)) * 0.005;
472  if (weight < 0.f) weight = 0.f;
473  weight = weight * weight * weight;
474 
475  m_fr[i] += m_fr[i] * (weight / 3);
476  }
477 }
478 
481 {
482  FeatureSet fs;
483  if (!m_r) return fs; // No autocorrelation: no results
484 
485  Feature feature;
486  feature.hasTimestamp = true;
487  feature.hasDuration = false;
488  feature.label = "";
489  feature.values.clear();
490  feature.values.push_back(0.f);
491 
492  char buffer[40];
493 
494  int n = m_n;
495 
496  for (int i = 0; i < n; ++i) {
497 
498  // Return the detection function in the DF output
499 
500  feature.timestamp = m_start +
501  RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
502  feature.values[0] = m_df[i];
503  feature.label = "";
504  fs[DFOutput].push_back(feature);
505  }
506 
507  for (int i = 1; i < n/2; ++i) {
508 
509  // Return the raw autocorrelation in the ACF output, each
510  // value labelled according to its corresponding tempo
511 
512  feature.timestamp = m_start +
513  RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
514  feature.values[0] = m_r[i];
515  sprintf(buffer, "%.1f bpm", lag2tempo(i));
516  if (i == n/2-1) feature.label = "";
517  else feature.label = buffer;
518  fs[ACFOutput].push_back(feature);
519  }
520 
521  float t0 = m_minbpm; // our minimum detected tempo
522  float t1 = m_maxbpm; // our maximum detected tempo
523 
524  int p0 = tempo2lag(t1);
525  int p1 = tempo2lag(t0);
526 
527  std::map<float, int> candidates;
528 
529  for (int i = p0; i <= p1 && i+1 < n/2; ++i) {
530 
531  if (m_fr[i] > m_fr[i-1] &&
532  m_fr[i] > m_fr[i+1]) {
533 
534  // This is a peak in the filtered autocorrelation: stick
535  // it into the map from filtered autocorrelation to lag
536  // index -- this sorts our peaks by filtered acf value
537 
538  candidates[m_fr[i]] = i;
539  }
540 
541  // Also return the filtered autocorrelation in its own output
542 
543  feature.timestamp = m_start +
544  RealTime::frame2RealTime(i * m_stepSize, m_inputSampleRate);
545  feature.values[0] = m_fr[i];
546  sprintf(buffer, "%.1f bpm", lag2tempo(i));
547  if (i == p1 || i == n/2-2) feature.label = "";
548  else feature.label = buffer;
549  fs[FilteredACFOutput].push_back(feature);
550  }
551 
552  if (candidates.empty()) {
553  cerr << "No tempo candidates!" << endl;
554  return fs;
555  }
556 
557  feature.hasTimestamp = true;
558  feature.timestamp = m_start;
559 
560  feature.hasDuration = true;
561  feature.duration = m_lasttime - m_start;
562 
563  // The map contains only peaks and is sorted by filtered acf
564  // value, so the final element in it is our "best" tempo guess
565 
566  std::map<float, int>::const_iterator ci = candidates.end();
567  --ci;
568  int maxpi = ci->second;
569 
570  if (m_t[maxpi] > 0) {
571 
572  // This lag has an adjusted tempo from the averaging process:
573  // use it
574 
575  feature.values[0] = m_t[maxpi];
576 
577  } else {
578 
579  // shouldn't happen -- it would imply that this high value was
580  // not a peak!
581 
582  feature.values[0] = lag2tempo(maxpi);
583  cerr << "WARNING: No stored tempo for index " << maxpi << endl;
584  }
585 
586  sprintf(buffer, "%.1f bpm", feature.values[0]);
587  feature.label = buffer;
588 
589  // Return the best tempo in the main output
590 
591  fs[TempoOutput].push_back(feature);
592 
593  // And return the other estimates (up to the arbitrarily chosen
594  // number of 10 of them) in the candidates output
595 
596  feature.values.clear();
597  feature.label = "";
598 
599  while (feature.values.size() < 10) {
600  if (m_t[ci->second] > 0) {
601  feature.values.push_back(m_t[ci->second]);
602  } else {
603  feature.values.push_back(lag2tempo(ci->second));
604  }
605  if (ci == candidates.begin()) break;
606  --ci;
607  }
608 
609  fs[CandidatesOutput].push_back(feature);
610 
611  return fs;
612 }
613 
614 
615 
617  Plugin(inputSampleRate),
618  m_d(new D(inputSampleRate))
619 {
620 }
621 
623 {
624  delete m_d;
625 }
626 
627 string
629 {
630  return "fixedtempo";
631 }
632 
633 string
635 {
636  return "Simple Fixed Tempo Estimator";
637 }
638 
639 string
641 {
642  return "Study a short section of audio and estimate its tempo, assuming the tempo is constant";
643 }
644 
645 string
647 {
648  return "Vamp SDK Example Plugins";
649 }
650 
651 int
653 {
654  return 1;
655 }
656 
657 string
659 {
660  return "Code copyright 2008 Queen Mary, University of London. Freely redistributable (BSD license)";
661 }
662 
663 size_t
665 {
666  return m_d->getPreferredStepSize();
667 }
668 
669 size_t
671 {
672  return m_d->getPreferredBlockSize();
673 }
674 
675 bool
676 FixedTempoEstimator::initialise(size_t channels, size_t stepSize, size_t blockSize)
677 {
678  if (channels < getMinChannelCount() ||
679  channels > getMaxChannelCount()) return false;
680 
681  return m_d->initialise(channels, stepSize, blockSize);
682 }
683 
684 void
686 {
687  return m_d->reset();
688 }
689 
692 {
693  return m_d->getParameterDescriptors();
694 }
695 
696 float
698 {
699  return m_d->getParameter(id);
700 }
701 
702 void
703 FixedTempoEstimator::setParameter(std::string id, float value)
704 {
705  m_d->setParameter(id, value);
706 }
707 
710 {
711  return m_d->getOutputDescriptors();
712 }
713 
715 FixedTempoEstimator::process(const float *const *inputBuffers, RealTime ts)
716 {
717  return m_d->process(inputBuffers, ts);
718 }
719 
722 {
723  return m_d->getRemainingFeatures();
724 }
std::vector< OutputDescriptor > OutputList
bool hasDuration
True if the returned results for this output are known to have a duration field.
ParameterList getParameterDescriptors() const
static int ACFOutput
std::string label
Label for the sample of this feature.
FeatureSet process(const float *const *inputBuffers, Vamp::RealTime timestamp)
Process a single block of input data.
float sampleRate
Sample rate of the output results, as samples per second.
size_t getPreferredStepSize() const
bool hasFixedBinCount
True if the output has the same number of values per sample for every output sample.
std::vector< float > values
Results for a single sample of this feature.
Results are evenly spaced in time (sampleRate specified below)
std::map< int, FeatureList > FeatureSet
float quantizeStep
Quantization resolution of the output values (e.g.
RealTime timestamp
Timestamp of the output feature.
std::string getName() const
Get a human-readable name or title of the plugin.
std::string identifier
The name of the parameter, in computer-usable form.
std::string description
A human-readable short text describing the output.
Plugin(float inputSampleRate)
void reset()
Reset the plugin after use, to prepare it for another clean run.
std::string getDescription() const
Get a human-readable description for the plugin, typically a line of text that may optionally be disp...
std::string identifier
The name of the output, in computer-usable form.
float minValue
Minimum value of the results in the output.
float getParameter(std::string id) const
Get the value of a named parameter.
OutputList getOutputDescriptors() const
std::string getIdentifier() const
Get the computer-usable name of the plugin.
std::string name
The human-readable name of the parameter.
float maxValue
Maximum value of the results in the output.
static int DFOutput
size_t getPreferredBlockSize() const
Get the preferred block size (window size – the number of sample frames passed in each block to the ...
float m_inputSampleRate
RealTime duration
Duration of the output feature.
float minValue
The minimum value of the parameter.
std::string getMaker() const
Get the name of the author or vendor of the plugin in human-readable form.
float getParameter(string id) const
std::string unit
The unit of the parameter, in human-readable form.
std::string unit
The unit of the output, in human-readable form.
std::string name
The human-readable name of the output.
RealTime represents time values to nanosecond precision with accurate arithmetic and frame-rate conve...
bool hasTimestamp
True if an output feature has its own timestamp.
FeatureSet getRemainingFeatures()
After all blocks have been processed, calculate and return any remaining features derived from the co...
void setParameter(std::string id, float value)
Set a named parameter.
std::string description
A human-readable short text describing the parameter.
float maxValue
The maximum value of the parameter.
static int CandidatesOutput
ParameterList getParameterDescriptors() const
Get the controllable parameters of this plugin.
FeatureSet process(const float *const *, RealTime)
bool hasDuration
True if an output feature has a specified duration.
std::string getCopyright() const
Get the copyright statement or licensing summary for the plugin.
size_t getPreferredBlockSize() const
virtual size_t getMaxChannelCount() const
Get the maximum supported number of input channels.
static int FilteredACFOutput
size_t binCount
The number of values per result of the output.
void setParameter(string id, float value)
size_t getPreferredStepSize() const
Get the preferred step size (window increment – the distance in sample frames between the start fram...
bool initialise(size_t channels, size_t stepSize, size_t blockSize)
Initialise a plugin to prepare it for use with the given number of input channels, step size (window increment, in sample frames) and block size (window size, in sample frames).
bool initialise(size_t channels, size_t stepSize, size_t blockSize)
bool isQuantized
True if the output values are quantized to a particular resolution.
static int TempoOutput
float defaultValue
The default value of the parameter.
OutputList getOutputDescriptors() const
Get the outputs of this plugin.
virtual size_t getMinChannelCount() const
Get the minimum supported number of input channels.
int getPluginVersion() const
Get the version number of the plugin.
bool isQuantized
True if the parameter values are quantized to a particular resolution.
FixedTempoEstimator(float inputSampleRate)
SampleType sampleType
Positioning in time of the output results.
D(float inputSampleRate)
Results are unevenly spaced and have individual timestamps.
bool hasKnownExtents
True if the results in each output bin fall within a fixed numeric range (minimum and maximum values)...
std::vector< ParameterDescriptor > ParameterList