evaluate.py 5.55 KB
Newer Older
Martin Lank's avatar
Martin Lank committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import sys
from datetime import datetime
from pathlib import Path

import os

from model_definitions import get_model
from utils import *

arguments = sys.argv[1:]
if len(arguments) < 1:
    print("Usage to evaluate all: " + sys.argv[0] + " --all")
    print("Usage to evaluate exact experiment: " + sys.argv[0] + " <experiment_number> [-d <dataset>]")
    sys.exit(1)

eval_all = arguments[0] == "--all"

date_string = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
if not eval_all:
    experiment_number_to_eval = int(arguments[0])

    exp_params = list(get_model(experiment_number_to_eval))

    if len(exp_params) == 2:
        exp_params.append(DATASET_DEFAULT)
    if len(exp_params) == 3:
        exp_params.append("rgb")
    if len(exp_params) == 4:
        exp_params.append(0.25)
    if len(exp_params) == 5:
        exp_params.append(False)

    model, model_optimizer, dataset, color_space, val_split, use_class_weights = exp_params

    output_name = "eval_" + date_string + "_exp_" + str(experiment_number_to_eval)
else:
    experiment_number_to_eval = None
    output_name = "eval_" + date_string + "_exp_all"
    dataset = DATASET_DEFAULT
    val_split = 0.25
    color_space = "rgb"

try:
    if arguments[1] == "-d":
        try:
            dataset = arguments[2]
            print("Overriding associated dataset with:", dataset)
        except:
            print("Specified dataset not found, continuing with:", dataset)
except:
    print("Using dataset: ", dataset)

train_ds, val_ds, class_names, train_len, val_len, class_weights = load_data(dataset, batch_size=50,
                                                                             cache_prefetch=False,
                                                                             validation_split=val_split,
                                                                             colorspace=color_space)

y_val = np.concatenate([y for x, y in val_ds], axis=0)

acc_all = []

pathlist = Path("./").glob('**/cp_best.ckpt.index')
for path in pathlist:
    # because path is object not string
    path_in_str = str(path)
    exp_folder = str(path.parents[0])

    try:
        exp_number = int(exp_folder.split("_")[1])
    except:
        continue
    if not eval_all and exp_number != experiment_number_to_eval:
        continue

    model = load_architecture(exp_folder, exp_number)
    model.load_weights(exp_folder + '/cp_best.ckpt')

    # predictions = model.predict(val_ds)
    # print(predictions)
    # y_pred = np.argmax(model.predict(val_ds), axis=-1)
    idx = np.argsort(model.predict(val_ds), axis=-1)
    # print(idx[:,-1])
    top_1 = idx[:,-1]
    top_2 = idx[:,-2]

    spread_df = pd.DataFrame({'y':y_val, 'top1':top_1,'top2':top_2})
    spread_df = spread_df[spread_df.y != spread_df.top1]
    spread_df['spread'] = np.abs(spread_df.top1 - spread_df.top2)
    spread_df['dir'] = spread_df.y.astype(str) +":"+ spread_df.top1.astype(str) + "->" +spread_df.top2.astype(str)
    # spread_df = pd.DataFrame(np.abs(top_1-top_2),columns=['spread'])
    # print(spread_df)
    print("Spread:")
    print(spread_df.dir.value_counts())
    print(spread_df.spread.value_counts())
    print("Top-k spread:", spread_df.describe())

    report_dict = classification_report(y_val, top_1, target_names=class_names, output_dict=True,labels=[0, 1, 2, 3, 4, 5])
    top_2_adj = [top_2[ii] if top_2[ii] == y_val[ii] else top_1[ii]  for ii in range(len(y_val))]
    report_top_2_dict = classification_report(y_val, top_2_adj, target_names=class_names, output_dict=True,labels=[0, 1, 2, 3, 4, 5])
    df = pd.DataFrame(report_dict).transpose()
    df2 = pd.DataFrame(report_top_2_dict).transpose()


    #  ty except cases applies when there is some class which does not have any predictions,
    #  in such cases classification_report() have different output
    try:
        print("Accuracy exp:", exp_number, df['precision']['accuracy'])
        
        top2_acc = df2['precision']['accuracy']
        print("Top-2 Accuracy exp:", exp_number, top2_acc)
    except:
        print("Accuracy exp:", exp_number, df['precision']['micro avg'])
        
        top2_acc = df2['precision']['micro avg']
        print("Top-2 Accuracy exp:", exp_number, top2_acc)
    num_of_classes = len(class_names)
    

    f_scores = []
    f_scores2 = []
    for i in range(0, num_of_classes):
        fs1_class = df['f1-score'][class_names[i]]
        f_scores.append(fs1_class)  # class names are 1,2,..6
        f_scores2.append(df2['f1-score'][class_names[i]])  # class names are 1,2,..6
    try:
        fs1 = df['f1-score']['accuracy']
        f_scores.append(fs1)
        f_scores2.append(df2['f1-score']['accuracy'])

        acc_all.append((exp_number, dataset, 1, val_split, df['precision']['accuracy'],*f_scores))
        acc_all.append((exp_number, dataset, 2, val_split, top2_acc,*f_scores2))

    except:
        f_scores.append(df['f1-score']['micro avg'])
        acc_all.append((exp_number, dataset, 1, val_split, df['precision']['micro avg'],*f_scores))
        acc_all.append((exp_number, dataset, 2, val_split, top2_acc,*f_scores2))

    # except:
        # print("Error occurred in ", exp_folder)

num_of_cols = len(pd.DataFrame(acc_all).columns)
col_names = ["exp", "dataset", "top_k", "val_split", "val_acc"]
for i in range(0, num_of_cols - len(col_names) - 1):
    col_names.append("f1_" + str(i + 1))
col_names.append("f1_all")

df_all = pd.DataFrame(acc_all, columns=col_names)
df_all_sorted = df_all.sort_values('val_acc')
print(df_all_sorted)

DIR = "evals"
if not os.path.exists(DIR):
    os.mkdir(DIR)

df_all_sorted.to_csv(DIR + "/" + output_name + ".csv", sep=";", index=False)