Commit 4ade09aa authored by Jan Rudolf's avatar Jan Rudolf
Browse files

ADD similarity to prepressing

parent f772773b
......@@ -20,6 +20,7 @@ def preprocess_sample(path, **kwargs):
def preprocess(n_mfcc):
output_folder = f'preprocessed_nmfcc_{n_mfcc}'
output_file = os.path.join(output_folder, 'data.csv')
similarity_file = os.path.join(output_folder, 'similarity.pyc')
columns = ['file', 'label']
df = pd.read_csv(DATASET_FILE, sep='\t', header=None, names=columns)
......@@ -54,11 +55,20 @@ def preprocess(n_mfcc):
done += 1
print(f'Preprocessing <{done}/{length}> ', end='\r')
output_df = pd.DataFrame(output_index, columns=columns, index=None)
output_df.to_csv(output_file, header=False, index=False)
similarity_list = list()
for i in range(len(output_index)):
for j in range(i, len(output_index)):
similarity_list.append((output_index[j][0], output_index[i][0], output_index[i][1] == output_index[j][1]))
similarity_df = pd.DataFrame(similarity_list, columns=['file_1', 'file_2', 'label'], index=None)
similarity_df.to_pickle(similarity_file, protocol=4)
if __name__ == '__main__':
