Commit 734dba57 authored by Tomas Rokos's avatar Tomas Rokos

Add improved dtw, methods for loading learned model, improve frontend

parent 15f0a7e4
This source diff could not be displayed because it is too large. You can view the blob instead.
from abc import ABC, abstractmethod
from audio_classification.classifier.util import split_mfcc
from audio_classification.preprocess import preprocess_sample
class BaseClassifier(ABC):
MFCC_LEN = 1296
def __init__(
self,
**kwargs # arguments for preprocessing
):
self.preprocess_kwargs = kwargs
# This should classify a mfcc matrix in shape (num_coefficients, MFCC_LEN=1296)
@abstractmethod
def classify(self, df):
def classify(self, mfcc):
pass
def classify_file(self, path):
return self.classify(preprocess_sample(path, **self.preprocess_kwargs))
# can classify mfcc with variable length
def classify_mfcc(self, mfcc):
mfccs = split_mfcc(mfcc, self.MFCC_LEN)
result = list(map(lambda x: self.classify(x), mfccs))
return max(set(result), key=result.count)
def fit(self, X, y, validation_data):
pass
class MockClassifier(BaseClassifier):
def classify(self, df):
......
import numpy as np
def split_mfcc(mfcc, frame_length):
hop_length = frame_length // 2
mfcc_len = mfcc.shape[1]
if mfcc_len < frame_length:
raise Exception('Audio file too short')
if mfcc_len == frame_length:
return np.array([mfcc])
mfcc_t = mfcc.T
mfccs = []
n_frames = 1 + int((mfcc_len - frame_length) // hop_length)
for i in range(0, n_frames):
start = i * hop_length
end = start + frame_length
mfccs.append(mfcc_t[start:end].T)
# ranks = [1 / 2] + [1] * (n_frames - 1)
rest = mfcc_len % hop_length
if rest != 0:
start = mfcc_len - frame_length
mfccs.append(mfcc_t[start:].T)
# ranks.append(rest / frame_length)
return np.array(mfccs)
\ No newline at end of file
from keras import models, layers
import pandas as pd
from audio_classification.classifier.base_classifier import BaseClassifier
from audio_classification.util.transform import decode_genre
class DNNSimpleClassifier(BaseClassifier):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.model = model = models.Sequential()
model.add(layers.Dense(1024, activation='relu', input_shape=(13,)))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
def load(self):
self.model = models.load_model('audio_classification/dnn_simple_classifier/saved_model')
@staticmethod
def __transform_mfcc(mfcc):
return pd.DataFrame(mfcc.T.mean(axis=0)).T
def classify(self, mfcc):
mfcc = self.__transform_mfcc(mfcc)
res = self.model.predict_classes(mfcc)
return decode_genre(res[0])
def fit(self, X, y, validation_data):
history = self.model.fit(X, y, epochs=100, batch_size=16, validation_data=validation_data)
self.model.save('audio_classification/dnn_simple_classifier/saved_model')
return history
class DNNSimpleClassifierFactory:
@staticmethod
def create():
cls = DNNSimpleClassifier()
cls.load()
return cls
import numpy as np
def minkowski(v1, v2, pow_n):
diff = np.absolute(np.subtract(v1, v2))
return np.sum(np.power(diff, pow_n)) ** (1 / pow_n)
def euclid(v1, v2):
return minkowski(v1, v2, pow_n=2)
\ No newline at end of file
import numpy as np
import math
from audio_classification.dtwclassifier.distances import euclid
from audio_classification.dtwclassifier.ranges import default_range
def euclid(a, b):
return math.sqrt((a-b) ** 2)
def abs_sub(a, b):
return abs(a-b)
def dtw(v1, v2, dist_fn=euclid):
def dtw(v1, v2, dist_fn=euclid, range_fn=default_range):
v1len, v2len = len(v1), len(v2)
matrix_rows, matrix_cols = v1len + 1, v2len + 1
matrix = np.full((matrix_rows, matrix_cols), np.inf)
matrix[0, 0] = 0
mat_rows, mat_cols = v1len + 1, v2len + 1
mat = np.full((mat_rows, mat_cols), np.inf)
mat[0, 0] = 0
for i in range(1, matrix_rows):
for j in range(1, matrix_cols):
cost = dist_fn(v1[i - 1], v2[j - 1])
for (i, j) in range_fn(v1len, v2len):
min_three = np.min([matrix[i, j - 1], matrix[i - 1, j], matrix[i - 1, j - 1]])
matrix[i, j] = cost + min_three
cost = dist_fn(v1[i], v2[j])
return matrix[v1len, v2len]
min_three = np.min([mat[i + 1, j], mat[i, j+1], mat[i, j]])
mat[i + 1, j + 1] = cost + min_three
return mat[v1len, v2len], mat[1:, 1:]
......@@ -25,11 +25,11 @@ class DTWClassifier(BaseClassifier):
MODE_MEAN = 'MEAN'
MODE_MIN = 'MIN'
def __init__(self, traindf, mode=MODE_MEAN, n_compared_songs=10, dist='euclidean'):
self.traindf = traindf
def __init__(self, mode=MODE_MEAN, n_compared_songs=10, dist='euclidean'):
self.mode = mode
self.n_compared_songs = n_compared_songs
self.dist = dist
self.traindf = None
def __get_mfcc_agg_func(self):
if self.mode == self.MODE_MEAN:
......@@ -38,12 +38,27 @@ class DTWClassifier(BaseClassifier):
return lambda mfccs, grouped_df: min(mfccs)
raise Exception("Bad mode specified.")
def classify(self, testdf):
result = testdf.iloc[:, 0].apply(lambda x: self.classify_mfcc(np.load(x)))
def classify(self, mfcc):
return self.__classify_mfcc(mfcc)
def classify_multiple(self, testdf):
result = testdf.iloc[:, 0].apply(lambda x: self.__classify_mfcc(np.load(x)))
return pd.DataFrame(result)
def classify_mfcc(self, mfcc):
def __classify_mfcc(self, mfcc):
result = self.traindf.groupby('label').apply(
lambda x: aggregate_genre(x, mfcc, self.__get_mfcc_agg_func(), self.n_compared_songs, self.dist)
)
return result.idxmin()
\ No newline at end of file
return result.idxmin()
def fit(self, X, y, _=None):
self.traindf = pd.merge(X, y, left_index=True, right_index=True)
class DTWClassifierFactory:
@staticmethod
def create():
cls = DTWClassifier()
df = pd.read_csv('preprocessed_nmfcc_13/data.csv', header=None, names=['file', 'label'])
cls.fit(df['file'], df['label'])
return cls
from itertools import product
def default_range(n, m):
return list(product(range(0, n), range(0, m)))
def sakoe_chiba_range(n, m, window_size=2):
res = []
for w in range(1, window_size):
for i in range(0, min(n, m) - w):
res.append((i + w, i))
for i in range(0, min(n, m) - w):
res.append((i, i + w))
for i in range(0, min(n, m)):
res.append((i, i))
return res
def itakura_constraint(i, j, n, m, slope):
return slope * i > j > m - 1 - slope * (n - i) and slope * j >= i >= n - 1 - slope * (m - j)
def itakura_range(n, m, s=2):
res = []
for i in range(1, n + 1):
for j in range(1, m + 1):
if itakura_constraint(i, j, n, m, s):
res.append((i - 1, j - 1))
return res
import os
from random import random
import librosa
from flask import Flask, request, flash, jsonify, send_from_directory, make_response
from werkzeug.utils import redirect, secure_filename
from audio_classification.classifier.base_classifier import MockClassifier
from audio_classification.dnn_simple_classifier.dnn_simple_classifier import DNNSimpleClassifierFactory
from audio_classification.dtwclassifier.dtwclassifier import DTWClassifierFactory
from audio_classification.run.utils import generate_soundwave
UPLOAD_FOLDER = 'upload'
app = Flask(__name__, static_url_path='', static_folder='static')
classifiers = {
'mock': MockClassifier()
}
classifier = classifiers['mock']
if not os.path.exists(UPLOAD_FOLDER):
os.mkdir(UPLOAD_FOLDER)
......@@ -38,18 +35,6 @@ def classify():
if request.method == "OPTIONS": # CORS
return _build_cors_prelight_response()
# if request.json is None:
# flash('No json presented')
# return redirect(request.url)
# classifier_id = request.json['classifier_id']
# if classifier_id not in classifiers:
# flash('Unknown classifier id')
# return redirect(request.url)
#
# classifier = classifiers[classifier_id]
if 'file' not in request.files:
flash('No file part')
return redirect(request.url)
......@@ -62,9 +47,23 @@ def classify():
try:
os.mkdir(folder)
file.save(filepath)
prediction = classifier.classify_file(filepath)
audio_file, sr = librosa.load(filepath)
mfcc = librosa.feature.mfcc(y=audio_file, sr=sr, n_mfcc=13)
simple_dnn_classifier = DNNSimpleClassifierFactory.create()
dtw_classifier = DTWClassifierFactory.create()
return jsonify({
"prediction": prediction,
"classification": [
{
'name': 'Simple DNN Classifier',
'genre': simple_dnn_classifier.classify_mfcc(mfcc)
},
{
'name': 'DTW classifier',
'genre': dtw_classifier.classify_mfcc(mfcc)
}
],
"dtw": simple_dnn_classifier.classify_mfcc(mfcc),
"filename": file.filename,
"soundwave": generate_soundwave(filepath)
})
......
import React, {useEffect, useRef} from 'react';
import {base64tosrc} from "./utils";
interface Classification {
name: string;
genre: string;
}
export interface APIResponse {
filename: string;
soundwave: string;
classification: Classification[];
}
interface ResultProps {
......@@ -25,6 +31,12 @@ const Result: React.FC<ResultProps> = ({response, file}) => {
<source src={file}/>
</audio>
<img src={base64tosrc(response.soundwave)} alt="Sound wave"/>
{response.classification.map(({name, genre}) => (
<div key={name}>
<h1>{name}</h1>
<p>{genre}</p>
</div>
))}
</div>
)
}
......
......@@ -224,8 +224,8 @@ var e,t;function r(e){if(void 0===e)throw new ReferenceError("this hasn't been i
},{"./cjs/react-dom.production.min.js":"i0Gi"}],"u7YK":[function(require,module,exports) {
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.base64tosrc=void 0;var e=function(e){return"data:image/jpg;base64,"+e};exports.base64tosrc=e;
},{}],"TwpU":[function(require,module,exports) {
"use strict";var e=this&&this.__createBinding||(Object.create?function(e,t,r,u){void 0===u&&(u=r),Object.defineProperty(e,u,{enumerable:!0,get:function(){return t[r]}})}:function(e,t,r,u){void 0===u&&(u=r),e[u]=t[r]}),t=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),r=this&&this.__importStar||function(r){if(r&&r.__esModule)return r;var u={};if(null!=r)for(var n in r)"default"!==n&&Object.prototype.hasOwnProperty.call(r,n)&&e(u,r,n);return t(u,r),u};Object.defineProperty(exports,"__esModule",{value:!0});var u=r(require("react")),n=require("./utils"),a=function(e){var t=e.response,r=e.file,a=u.useRef();return u.useEffect(function(){a.current.pause(),a.current.load()},[r]),u.default.createElement("div",null,u.default.createElement("h1",null,t.filename),u.default.createElement("audio",{controls:!0,style:{width:"100%"},ref:a},u.default.createElement("source",{src:r})),u.default.createElement("img",{src:n.base64tosrc(t.soundwave),alt:"Sound wave"}))};exports.default=a;
"use strict";var e=this&&this.__createBinding||(Object.create?function(e,t,r,n){void 0===n&&(n=r),Object.defineProperty(e,n,{enumerable:!0,get:function(){return t[r]}})}:function(e,t,r,n){void 0===n&&(n=r),e[n]=t[r]}),t=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),r=this&&this.__importStar||function(r){if(r&&r.__esModule)return r;var n={};if(null!=r)for(var u in r)"default"!==u&&Object.prototype.hasOwnProperty.call(r,u)&&e(n,r,u);return t(n,r),n};Object.defineProperty(exports,"__esModule",{value:!0});var n=r(require("react")),u=require("./utils"),a=function(e){var t=e.response,r=e.file,a=n.useRef();return n.useEffect(function(){a.current.pause(),a.current.load()},[r]),n.default.createElement("div",null,n.default.createElement("h1",null,t.filename),n.default.createElement("audio",{controls:!0,style:{width:"100%"},ref:a},n.default.createElement("source",{src:r})),n.default.createElement("img",{src:u.base64tosrc(t.soundwave),alt:"Sound wave"}),t.classification.map(function(e){var t=e.name,r=e.genre;return n.default.createElement("div",{key:t},n.default.createElement("h1",null,t),n.default.createElement("p",null,r))}))};exports.default=a;
},{"react":"SAdv","./utils":"u7YK"}],"zo2T":[function(require,module,exports) {
"use strict";var e=this&&this.__importDefault||function(e){return e&&e.__esModule?e:{default:e}};Object.defineProperty(exports,"__esModule",{value:!0}),require("./index.css"),require("@uppy/core/dist/style.min.css"),require("@uppy/dashboard/dist/style.css");var t=require("@uppy/core"),r=e(require("@uppy/dashboard")),i=e(require("@uppy/xhr-upload")),u=e(require("react-dom")),s=e(require("react")),l=e(require("./ts/Result")),d=require("./ts/utils"),o=new t.Uppy({debug:!0,allowMultipleUploads:!0,autoProceed:!0,restrictions:{allowedFileTypes:[".wav"]}}).use(r.default,{inline:!0,target:"#uploader",height:"100%",width:"100%",theme:"light"}).use(i.default,{method:"POST",endpoint:"http://localhost:4666/classify",fieldName:"file"});o.on("file-added",function(e){o.getFiles().filter(function(t){return e.id!=t.id}).forEach(function(e){return o.removeFile(e.id)})}),o.on("upload-success",function(e,t){var r=document.getElementById("result");if(!r)throw"No res element!";var i=t.body;o.setFileState(e.id,{preview:d.base64tosrc(i.soundwave)}),u.default.render(s.default.createElement(l.default,{response:i,file:URL.createObjectURL(e.data)}),r)}),o.on("complete",function(e){o.getFiles()[0].id});
},{"./index.css":"vKFU","@uppy/core/dist/style.min.css":"vKFU","@uppy/dashboard/dist/style.css":"vKFU","@uppy/core":"Fa4j","@uppy/dashboard":"fMd0","@uppy/xhr-upload":"hnV8","react-dom":"CSY6","react":"SAdv","./ts/Result":"TwpU","./ts/utils":"u7YK"}]},{},["zo2T"], null)
//# sourceMappingURL=/frontend.f8a14f1a.js.map
\ No newline at end of file
//# sourceMappingURL=/frontend.6af4dc52.js.map
\ No newline at end of file
<html lang="en"><head><title>Audio classification</title><link rel="stylesheet" href="/frontend.56c3c255.css"></head><body> <div class="container"> <h1>Audio Genre Classificator</h1> <div id="uploader" class="dropzone"></div> <div id="result"></div> </div> <script src="/frontend.f8a14f1a.js"></script> </body></html>
\ No newline at end of file
<html lang="en"><head><title>Audio classification</title><link rel="stylesheet" href="/frontend.56c3c255.css"></head><body> <div class="container"> <h1>Audio Genre Classificator</h1> <div id="uploader" class="dropzone"></div> <div id="result"></div> </div> <script src="/frontend.6af4dc52.js"></script> </body></html>
\ No newline at end of file
from sklearn.preprocessing import OrdinalEncoder
genre_encoding = {
'blues': 0,
'classical': 1,
'country': 2,
'disco': 3,
'hiphop': 4,
'jazz': 5,
'metal': 6,
'pop': 7,
'reggae': 8,
'rock': 9
}
def encode_genre(genre):
return genre_encoding[genre]
def decode_genre(given_encoded_genre):
for genre, encoded_genre in genre_encoding.items():
if given_encoded_genre == encoded_genre:
return genre
genre_encoder = OrdinalEncoder().fit(list(genre_encoding.items()))
......@@ -44690,30 +44690,73 @@ exports.base64tosrc = base64tosrc;
},{}],"ts/Result.tsx":[function(require,module,exports) {
"use strict";
var __importDefault = this && this.__importDefault || function (mod) {
return mod && mod.__esModule ? mod : {
"default": mod
};
var __createBinding = this && this.__createBinding || (Object.create ? function (o, m, k, k2) {
if (k2 === undefined) k2 = k;
Object.defineProperty(o, k2, {
enumerable: true,
get: function get() {
return m[k];
}
});
} : function (o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
});
var __setModuleDefault = this && this.__setModuleDefault || (Object.create ? function (o, v) {
Object.defineProperty(o, "default", {
enumerable: true,
value: v
});
} : function (o, v) {
o["default"] = v;
});
var __importStar = this && this.__importStar || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) {
if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
}
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", {
value: true
});
var react_1 = __importDefault(require("react"));
var react_1 = __importStar(require("react"));
var utils_1 = require("./utils");
var Result = function Result(_ref) {
var response = _ref.response,
file = _ref.file;
var audio = react_1.useRef();
react_1.useEffect(function () {
audio.current.pause();
audio.current.load();
}, [file]);
return react_1.default.createElement("div", null, react_1.default.createElement("h1", null, response.filename), react_1.default.createElement("audio", {
controls: true
controls: true,
style: {
'width': '100%'
},
ref: audio
}, react_1.default.createElement("source", {
src: file
})), react_1.default.createElement("img", {
src: utils_1.base64tosrc(response.soundwave),
alt: "Sound wave"
}), response.classification.map(function (_ref2) {
var name = _ref2.name,
genre = _ref2.genre;
return react_1.default.createElement("div", {
key: name
}, react_1.default.createElement("h1", null, name), react_1.default.createElement("p", null, genre));
}));
};
......@@ -44755,10 +44798,8 @@ var uppy = new core_1.Uppy({
debug: true,
allowMultipleUploads: true,
autoProceed: true,
restrictions: {// maxFileSize: 1000000,
// maxNumberOfFiles: 1,
// minNumberOfFiles: 2,
// allowedFileTypes: ['image/*', 'video/*']
restrictions: {
allowedFileTypes: ['.wav']
}
}).use(dashboard_1.default, {
inline: true,
......@@ -44795,8 +44836,6 @@ uppy.on('file-added', function (file) {
});
});
uppy.on('upload-success', function (file, response) {
console.log(file);
console.log(response);
var resElement = document.getElementById('result');
if (!resElement) {
......@@ -44813,12 +44852,7 @@ uppy.on('upload-success', function (file, response) {
}), resElement);
});
uppy.on('complete', function (result) {
// console.log(result);
var id = uppy.getFiles()[0].id;
uppy.setFileMeta(id, {
genre: 'jazz'
});
console.log('Upload complete! We’ve uploaded these files:', result.successful);
});
},{"./index.css":"index.css","@uppy/core/dist/style.min.css":"../../../node_modules/@uppy/core/dist/style.min.css","@uppy/dashboard/dist/style.css":"../../../node_modules/@uppy/dashboard/dist/style.css","@uppy/core":"../../../node_modules/@uppy/core/lib/index.js","@uppy/dashboard":"../../../node_modules/@uppy/dashboard/lib/index.js","@uppy/xhr-upload":"../../../node_modules/@uppy/xhr-upload/lib/index.js","react-dom":"../../../node_modules/react-dom/index.js","react":"../../../node_modules/react/index.js","./ts/Result":"ts/Result.tsx","./ts/utils":"ts/utils.ts"}],"../../../../../../../../usr/local/lib/node_modules/parcel-bundler/src/builtins/hmr-runtime.js":[function(require,module,exports) {
var global = arguments[3];
......@@ -44848,7 +44882,7 @@ var parent = module.bundle.parent;
if ((!parent || !parent.isParcelRequire) && typeof WebSocket !== 'undefined') {
var hostname = "" || location.hostname;
var protocol = location.protocol === 'https:' ? 'wss' : 'ws';
var ws = new WebSocket(protocol + '://' + hostname + ':' + "52116" + '/');
var ws = new WebSocket(protocol + '://' + hostname + ':' + "54267" + '/');
ws.onmessage = function (event) {
checkedAssets = {};
This diff is collapsed.
This diff is collapsed.
......@@ -5,7 +5,7 @@ argon2-cffi==20.1.0
astunparse==1.6.3
async-generator==1.10
attrs==20.2.0
-e git+https://gitlab.fit.cvut.cz/rudolja4/ni-vmm-music-genre-classification.git@5208df95d488bb4a7806a242606bbef486bd3b9e#egg=audio_classification
-e git+https://gitlab.fit.cvut.cz/rudolja4/ni-vmm-music-genre-classification.git@15f0a7e42fcc605c8c8262ccbfa3423edc21130f#egg=audio_classification
audioread==2.1.9
backcall==0.2.0
bleach==3.2.1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment