Metrics and model comparison (APIs)
VIANOPS performance metrics help you to monitor models and evaluate their operation and accuracy.
Performance drift policies calculate configured metrics each time they run. To get results of performance drift policies use v1/performance/search or vianops_client.models.riskstore.performance.V1PerformanceSearch(SDK).
For example, the following dict is returned for a classification model. These metrics are calculated as a result of running the policy ModP Policy 6m Day with accuracy defined as the metric of interest.
{
"items": [
{
"status": "Success",
"key": "",
"job_id": "modelperformance-queue-988463",
"deployment": "Credit Card Fraud1",
"model_name": "Credit Card Fraud1",
"model_version": "1",
"model_stage": "primary",
"hostname": "modelperformance-queue-988463-gnmjc",
"feature_set_name": null,
"experiment_type": "classification",
"policy_name": "ModP Policy 6m Day",
"policy_uuid": "4e99cbb1-1270-451c-8244-5f9aac0cc6c9",
"segment_id": null,
"model_uuid": "d0b0036c-625a-42d0-84fd-6824d9cf53d3",
"results": {
"baseline_metrics": {
"False": {
"accuracy": 0.9756355921868267,
"balanced_accuracy": 0.7553191429464344,
"bm": 0.5106382858928689,
"dor": 2043474.1262746586,
"f1": 0.9873417716085074,
"fdr": 0.024999999972826085,
"fm": 0.9874208818195314,
"fn": 0.0,
"fnr": 0.0,
"for": 0.0,
"fp": 23.0,
"fpr": 0.4893616917157087,
"lift": 20.085106382978722,
"logloss": 0.3773188693195634,
"mcc": 3.4567222585378587,
"mk": 0.9749999572735524,
"modelgini": 0.5106382978723403,
"nlr": 0.0,
"npv": 0.999999958333335,
"plr": 2.0434741262746585,
"precision": 0.9749999989402174,
"pt": 0.41160660726983567,
"recall": 0.9999999988851728,
"rnp": 0.025423728786627405,
"rocauc": 0.7553191489361701,
"specificity": 0.5106382870076961,
"tn": 24.0,
"tp": 897.0,
"ts": 0.9749999989402174
},
"True": {
"accuracy": 0.9756355921868267,
"balanced_accuracy": 0.7553191429464344,
"bm": 0.5106382858928689,
"dor": 1043477.1708594387,
"f1": 0.676056328506249,
"fdr": 0.0,
"fm": 0.7145895785211968,
"fn": 23.0,
"fnr": 0.4893616917157087,
"for": 0.024999999972826085,
"fp": 0.0,
"fpr": 0.0,
"lift": 1.0260869565217392,
"logloss": 0.3773188693195634,
"mcc": 21.392205416433633,
"mk": 0.9749999572735524,
"modelgini": 0.5106382978723405,
"nlr": 0.48936120290005897,
"npv": 0.9749999989402174,
"plr": 510638.2870076961,
"precision": 0.999999958333335,
"pt": 0.0,
"recall": 0.5106382870076961,
"rnp": 0.9745762701540506,
"rocauc": 0.7553191489361702,
"specificity": 0.9999999988851728,
"tn": 897.0,
"tp": 24.0,
"ts": 0.5106382870076961
},
"macro": {
"accuracy": null,
"balanced_accuracy": null,
"bm": null,
"dor": null,
"f1": 0.8316990500573782,
"fdr": null,
"fm": null,
"fn": null,
"fnr": null,
"for": null,
"fp": null,
"fpr": null,
"lift": null,
"logloss": null,
"mcc": null,
"mk": null,
"modelgini": null,
"nlr": null,
"npv": null,
"plr": null,
"precision": 0.9874999786367762,
"pt": null,
"recall": 0.7553191429464344,
"rnp": null,
"rocauc": null,
"specificity": null,
"tn": null,
"tp": null,
"ts": null
},
"micro": {
"accuracy": null,
"balanced_accuracy": null,
"bm": null,
"dor": null,
"f1": 0.9756355921868267,
"fdr": null,
"fm": null,
"fn": null,
"fnr": null,
"for": null,
"fp": null,
"fpr": null,
"lift": null,
"logloss": null,
"mcc": null,
"mk": null,
"modelgini": null,
"nlr": null,
"npv": null,
"plr": null,
"precision": 0.9756355921868267,
"pt": null,
"recall": 0.9756355921868267,
"rnp": null,
"rocauc": null,
"specificity": null,
"tn": null,
"tp": null,
"ts": null
},
"weighted": {
"accuracy": null,
"balanced_accuracy": null,
"bm": null,
"dor": null,
"f1": 0.9718434487296412,
"fdr": null,
"fm": null,
"fn": null,
"fnr": null,
"for": null,
"fp": null,
"fpr": null,
"lift": null,
"logloss": null,
"mcc": null,
"mk": null,
"modelgini": null,
"nlr": null,
"npv": null,
"plr": null,
"precision": 0.9762446992741494,
"pt": null,
"recall": 0.9756355905865742,
"rnp": null,
"rocauc": null,
"specificity": null,
"tn": null,
"tp": null,
"ts": null
}
},
"class_of_interest": "False",
"classes": [
"False",
"True"
],
"compared_metrics": {
"False": {
"accuracy": 0.7794539236288591,
"balanced_accuracy": 0.6197176350803714,
"bm": 1.8333313655838992,
"dor": 1.950352421669011,
"f1": 0.4101828180912847,
"fdr": -31.721194901998416,
"fm": 0.4058583469752274,
"fn": null,
"fnr": null,
"for": null,
"fp": -47.82608695652174,
"fpr": -1.913045314782547,
"lift": 42.59322033898306,
"logloss": -1.7076767229153917,
"mcc": -25.428995985020826,
"mk": 0.8133603561512961,
"modelgini": 1.833333333333367,
"nlr": null,
"npv": -3.5256407374864913e-06,
"plr": 1.9503524216690094,
"precision": 0.8133639374339545,
"pt": -0.5677755606485893,
"recall": -3.3235081390717745e-08,
"rnp": -28.584730005468252,
"rocauc": 0.6197183098591662,
"specificity": 1.8333314266667327,
"tn": -45.83333333333333,
"tp": -22.965440356744704,
"ts": 0.8133639374339545
},
"True": {
"accuracy": 0.7794539236288591,
"balanced_accuracy": 0.6197176350803714,
"bm": 1.8333313655838992,
"dor": 3.819440272208927,
"f1": 1.2061391129963417,
"fdr": null,
"fm": 0.9125006311850866,
"fn": -47.82608695652174,
"fnr": -1.913045314782547,
"for": -31.721194901998416,
"fp": null,
"fpr": null,
"lift": -0.7401692504279528,
"logloss": -1.7076767229153917,
"mcc": -11.527877245046717,
"mk": 0.8133603561512961,
"modelgini": 1.8333333333333226,
"nlr": -1.913045282183293,
"npv": 0.8133639374339545,
"plr": 1.8333314266667333,
"precision": -3.5256407374864913e-06,
"pt": null,
"recall": 1.8333314266667327,
"rnp": 0.7456885742257191,
"rocauc": 0.6197183098591513,
"specificity": -3.3235081390717745e-08,
"tn": -22.965440356744704,
"tp": -45.83333333333333,
"ts": 1.8333314266667327
}
},
"count_y": 944.0,
"critical_level": -15.0,
"experiment_type": "classification",
"input_baseline_metrics": {
"experiment_type": "binary_classification",
"metrics": {
"False": {
"accuracy": 0.9756355921868267,
"balanced_accuracy": 0.7553191429464344,
"bm": 0.5106382858928689,
"dor": 2043474.1262746586,
"f1": 0.9873417716085074,
"fdr": 0.024999999972826085,
"fm": 0.9874208818195314,
"fn": 0.0,
"fnr": 0.0,
"for": 0.0,
"fp": 23.0,
"fpr": 0.4893616917157087,
"lift": 20.085106382978722,
"logloss": 0.3773188693195634,
"mcc": 3.4567222585378587,
"mk": 0.9749999572735524,
"modelgini": 0.5106382978723403,
"nlr": 0.0,
"npv": 0.999999958333335,
"plr": 2.0434741262746585,
"precision": 0.9749999989402174,
"pt": 0.41160660726983567,
"recall": 0.9999999988851728,
"rnp": 0.025423728786627405,
"rocauc": 0.7553191489361701,
"specificity": 0.5106382870076961,
"tn": 24.0,
"tp": 897.0,
"ts": 0.9749999989402174
},
"True": {
"accuracy": 0.9756355921868267,
"balanced_accuracy": 0.7553191429464344,
"bm": 0.5106382858928689,
"dor": 1043477.1708594387,
"f1": 0.676056328506249,
"fdr": 0.0,
"fm": 0.7145895785211968,
"fn": 23.0,
"fnr": 0.4893616917157087,
"for": 0.024999999972826085,
"fp": 0.0,
"fpr": 0.0,
"lift": 1.0260869565217392,
"logloss": 0.3773188693195634,
"mcc": 21.392205416433633,
"mk": 0.9749999572735524,
"modelgini": 0.5106382978723405,
"nlr": 0.48936120290005897,
"npv": 0.9749999989402174,
"plr": 510638.2870076961,
"precision": 0.999999958333335,
"pt": 0.0,
"recall": 0.5106382870076961,
"rnp": 0.9745762701540506,
"rocauc": 0.7553191489361702,
"specificity": 0.9999999988851728,
"tn": 897.0,
"tp": 24.0,
"ts": 0.5106382870076961
},
"classes": [
"False",
"True"
],
"count_y": 944.0,
"macro": {
"f1": 0.8316990500573782,
"precision": 0.9874999786367762,
"recall": 0.7553191429464344
},
"micro": {
"f1": 0.9756355921868267,
"precision": 0.9756355921868267,
"recall": 0.9756355921868267
},
"weighted": {
"f1": 0.9718434487296412,
"precision": 0.9762446992741494,
"recall": 0.9756355905865742
}
},
"segment": [
"0"
]
},
"input_target_metrics": {
"experiment_type": "binary_classification",
"metrics": {
"False": {
"accuracy": 0.9832402220904466,
"balanced_accuracy": 0.7599999888764114,
"bm": 0.5199999777528228,
"dor": 2083329.073382636,
"f1": 0.9913916779114836,
"fdr": 0.01706970125594637,
"fm": 0.9914284118881723,
"fn": 0.0,
"fnr": 0.0,
"for": 0.0,
"fp": 12.0,
"fpr": 0.4799999808000007,
"lift": 28.64,
"logloss": 0.37087548281702565,
"mcc": 2.5777124942009455,
"mk": 0.9829302203985075,
"modelgini": 0.52,
"nlr": 0.0,
"npv": 0.9999999230769291,
"plr": 2.083329073382636,
"precision": 0.9829302973215785,
"pt": 0.4092696055477427,
"recall": 0.999999998552822,
"rnp": 0.018156424555647453,
"rocauc": 0.76,
"specificity": 0.5199999792000009,
"tn": 13.0,
"tp": 691.0,
"ts": 0.9829302973215785
},
"True": {
"accuracy": 0.9832402220904466,
"balanced_accuracy": 0.7599999888764114,
"bm": 0.5199999777528228,
"dor": 1083332.1581545505,
"f1": 0.6842105083102499,
"fdr": 0.0,
"fm": 0.7211102129355855,
"fn": 12.0,
"fnr": 0.4799999808000007,
"for": 0.01706970125594637,
"fp": 0.0,
"fpr": 0.0,
"lift": 1.0184921763869133,
"logloss": 0.37087548281702565,
"mcc": 18.92613823601893,
"mk": 0.9829302203985075,
"modelgini": 0.52,
"nlr": 0.479999501495144,
"npv": 0.9829302973215785,
"plr": 519999.9792000009,
"precision": 0.9999999230769291,
"pt": 0.0,
"recall": 0.5199999792000009,
"rnp": 0.9818435740477045,
"rocauc": 0.76,
"specificity": 0.999999998552822,
"tn": 691.0,
"tp": 13.0,
"ts": 0.5199999792000009
},
"classes": [
"False",
"True"
],
"count_y": 716.0,
"macro": {
"f1": 0.8378010931108668,
"precision": 0.9914651101992538,
"recall": 0.7599999888764114
},
"micro": {
"f1": 0.9832402220904466,
"precision": 0.9832402220904466,
"recall": 0.9832402220904466
},
"weighted": {
"f1": 0.9806660770445884,
"precision": 0.9835263024338096,
"recall": 0.9832402199675415
}
},
"segment": [
"0"
]
},
"log_filename": "VianaiMetricsComparison.log",
"metric_of_interest": "accuracy",
"metric_of_interest_change": 0.7794539236288591,
"metric_of_interest_definition": "(TP + TN)/(TP + TN + FP + FN)",
"metric_of_interest_lower_is_better": false,
"metric_of_interest_name": "Accuracy",
"performance_drift_critical_flag": 0,
"performance_drift_warning_flag": 0,
"plot_class": "False",
"plot_metric": "accuracy",
"save_directory": "reports/performance",
"save_file": "./compare.json",
"segment_id": 0,
"target_metrics": {
"False": {
"accuracy": 0.9832402220904466,
"balanced_accuracy": 0.7599999888764114,
"bm": 0.5199999777528228,
"dor": 2083329.073382636,
"f1": 0.9913916779114836,
"fdr": 0.01706970125594637,
"fm": 0.9914284118881723,
"fn": 0.0,
"fnr": 0.0,
"for": 0.0,
"fp": 12.0,
"fpr": 0.4799999808000007,
"lift": 28.64,
"logloss": 0.37087548281702565,
"mcc": 2.5777124942009455,
"mk": 0.9829302203985075,
"modelgini": 0.52,
"nlr": 0.0,
"npv": 0.9999999230769291,
"plr": 2.083329073382636,
"precision": 0.9829302973215785,
"pt": 0.4092696055477427,
"recall": 0.999999998552822,
"rnp": 0.018156424555647453,
"rocauc": 0.76,
"specificity": 0.5199999792000009,
"tn": 13.0,
"tp": 691.0,
"ts": 0.9829302973215785
},
"True": {
"accuracy": 0.9832402220904466,
"balanced_accuracy": 0.7599999888764114,
"bm": 0.5199999777528228,
"dor": 1083332.1581545505,
"f1": 0.6842105083102499,
"fdr": 0.0,
"fm": 0.7211102129355855,
"fn": 12.0,
"fnr": 0.4799999808000007,
"for": 0.01706970125594637,
"fp": 0.0,
"fpr": 0.0,
"lift": 1.0184921763869133,
"logloss": 0.37087548281702565,
"mcc": 18.92613823601893,
"mk": 0.9829302203985075,
"modelgini": 0.52,
"nlr": 0.479999501495144,
"npv": 0.9829302973215785,
"plr": 519999.9792000009,
"precision": 0.9999999230769291,
"pt": 0.0,
"recall": 0.5199999792000009,
"rnp": 0.9818435740477045,
"rocauc": 0.76,
"specificity": 0.999999998552822,
"tn": 691.0,
"tp": 13.0,
"ts": 0.5199999792000009
},
"macro": {
"accuracy": null,
"balanced_accuracy": null,
"bm": null,
"dor": null,
"f1": 0.8378010931108668,
"fdr": null,
"fm": null,
"fn": null,
"fnr": null,
"for": null,
"fp": null,
"fpr": null,
"lift": null,
"logloss": null,
"mcc": null,
"mk": null,
"modelgini": null,
"nlr": null,
"npv": null,
"plr": null,
"precision": 0.9914651101992538,
"pt": null,
"recall": 0.7599999888764114,
"rnp": null,
"rocauc": null,
"specificity": null,
"tn": null,
"tp": null,
"ts": null
},
"micro": {
"accuracy": null,
"balanced_accuracy": null,
"bm": null,
"dor": null,
"f1": 0.9832402220904466,
"fdr": null,
"fm": null,
"fn": null,
"fnr": null,
"for": null,
"fp": null,
"fpr": null,
"lift": null,
"logloss": null,
"mcc": null,
"mk": null,
"modelgini": null,
"nlr": null,
"npv": null,
"plr": null,
"precision": 0.9832402220904466,
"pt": null,
"recall": 0.9832402220904466,
"rnp": null,
"rocauc": null,
"specificity": null,
"tn": null,
"tp": null,
"ts": null
},
"weighted": {
"accuracy": null,
"balanced_accuracy": null,
"bm": null,
"dor": null,
"f1": 0.9806660770445884,
"fdr": null,
"fm": null,
"fn": null,
"fnr": null,
"for": null,
"fp": null,
"fpr": null,
"lift": null,
"logloss": null,
"mcc": null,
"mk": null,
"modelgini": null,
"nlr": null,
"npv": null,
"plr": null,
"precision": 0.9835263024338096,
"pt": null,
"recall": 0.9832402199675415,
"rnp": null,
"rocauc": null,
"specificity": null,
"tn": null,
"tp": null,
"ts": null
}
},
"timestamp": "2023-07-11 14:08:33",
"warning_level": -5.0,
"baseline_window": {
"start_date": "2023-05-06 00:00:00",
"end_date": "2023-05-06 23:59:59"
},
"target_window": {
"start_date": "2023-05-07 00:00:00",
"end_date": "2023-05-07 23:59:59"
},
"prediction_volume": 716,
"ground_truth_volume": 716.0,
"ground_truth_percentage": 1.0
},
"runtime": 1689084513872.714,
"processed_ts": 1683417600000.0,
"predict_date": 1683417600000.0,
"index": 1000
}
],
"current_page": 1,
"page_size": 50,
"previous_page": null,
"next_page": null,
"has_previous": false,
"previous_items": 0,
"has_next": false,
"total": 1,
"pages": 1
}
To run all possible performance metrics on a model (specific to expression type: regression or classification), use v1/model-performance (REST API) or vianops_client.api.jobmaker.modelperformance.ModelPerformanceV1Api
(SDK).
For example, the following dict is returned for a classification model. This dict includes values calculated for additional averaging techniques.
{
"index": 273,
"deployment": "Credit Card Fraud1",
"model_name": "Credit Card Fraud1",
"model_version": "1",
"model_stage": "primary",
"hostname": "modelperformance-queue-988463-gnmjc",
"predict_date": 1681603200000.0,
"formatted_predict_date": "2023-04-16",
"metrics": {
"classes": [
"False",
"True"
],
"macro": {
"precision": 0.9911080410518374,
"recall": 0.7833333231925104,
"f1": 0.8572162673098757
},
"micro": {
"precision": 0.9826203195419514,
"recall": 0.9826203195419514,
"f1": 0.9826203195419514
},
"weighted": {
"precision": 0.982929393633817,
"recall": 0.9826203174474774,
"f1": 0.9802947082974853
},
"count_y": 748.0,
"False": {
"tn": 17.0,
"fp": 13.0,
"fn": 0.0,
"tp": 718.0,
"accuracy": 0.9826203195419514,
"balanced_accuracy": 0.7833333231925104,
"f1": 0.9910282946921819,
"recall": 0.9999999986072423,
"specificity": 0.5666666477777784,
"precision": 0.9822161409272009,
"npv": 0.999999941176474,
"fnr": 0.0,
"fpr": 0.4333333188888894,
"fdr": 0.017783857704810044,
"for": 0.0,
"plr": 2.3076870559694833,
"nlr": 0.0,
"pt": 0.396965501389143,
"ts": 0.9822161409272009,
"mcc": 3.076037269505317,
"fm": 0.9910681810850411,
"bm": 0.5666666463850207,
"mk": 0.9822160821036747,
"dor": 2307687.055969483,
"rnp": 0.02272727269688867,
"precision_recall_curve": {
"precision": [
0.9598930481283422,
0.0,
1.0
],
"recall": [
1.0,
0.0,
0.0
],
"thresholds": [
0.30000001192092896,
0.699999988079071
]
},
"roc_curve": {
"fpr": [
0.0,
0.5666666666666667,
1.0
],
"tpr": [
0.0,
0.0,
1.0
],
"thresholds": [
1.699999988079071,
0.699999988079071,
0.30000001192092896
]
},
"logloss": 0.371400724935304,
"calibration_curve": {
"prob_true": [
0.5,
0.0
],
"prob_pred": [
0.30000001192092896,
0.699999988079071
]
},
"rocauc": 0.7833333333333333,
"modelgini": 0.5666666666666667,
"lift": 24.933333333333334,
"lift_curve": {
"x": [
0.25,
0.5,
0.75,
1.0
],
"y": [
2.2666666666666666,
1.1333333333333333,
0.7555555555555555,
0.5666666666666667
]
}
},
"True": {
"tn": 718.0,
"fp": 0.0,
"fn": 13.0,
"tp": 17.0,
"accuracy": 0.9826203195419514,
"balanced_accuracy": 0.7833333231925104,
"f1": 0.7234042399275694,
"recall": 0.5666666477777784,
"specificity": 0.9999999986072423,
"precision": 0.999999941176474,
"npv": 0.9822161409272009,
"fnr": 0.4333333188888894,
"fpr": 0.0,
"fdr": 0.0,
"for": 0.017783857704810044,
"plr": 566666.6477777784,
"nlr": 0.43333288615953097,
"pt": 0.0,
"ts": 0.5666666477777784,
"mcc": 20.150832769421257,
"fm": 0.7527726180224996,
"bm": 0.5666666463850207,
"mk": 0.9822160821036747,
"dor": 1307690.595812674,
"rnp": 0.977272725966213,
"precision_recall_curve": {
"precision": [
0.9598930481283422,
0.9822161422708618,
1.0
],
"recall": [
1.0,
1.0,
0.0
],
"thresholds": [
0.30000001192092896,
0.699999988079071
]
},
"roc_curve": {
"fpr": [
0.0,
0.43333333333333335,
1.0
],
"tpr": [
0.0,
1.0,
1.0
],
"thresholds": [
1.699999988079071,
0.699999988079071,
0.30000001192092896
]
},
"logloss": 0.371400724935304,
"calibration_curve": {
"prob_true": [
0.0,
0.5
],
"prob_pred": [
0.30000001192092896,
0.699999988079071
]
},
"rocauc": 0.7833333333333333,
"modelgini": 0.5666666666666667,
"lift": 1.0232558139534884,
"lift_curve": {
"x": [
0.25,
0.5,
0.75,
1.0
],
"y": [
4.0,
2.0,
1.3333333333333333,
1.0
]
}
}
},
"num_inferences": 748,
"num_ground_truths": 748,
"ground_truth_percentage": 1.0,
"updated_ts": 1689084301744.427,
"status": "Success",
"segment_id": null,
"model_uuid": "d0b0036c-625a-42d0-84fd-6824d9cf53d3",
"training": null
},
Support for model comparison
Use the endpoint v1/performance/model-metrics to specify models, segments, and time windows for retrieving performance metrics. The endpoint can return metrics computed for one or many models. If the endpoint doesn’t return metrics then preprocessing wasn’t run for the model during the defined window.
The returned dict contains the performance metrics computed for all specified models and segments within configured timeframes.
If specifying two models for comparison, make sure the models are of the same experiment type (i.e., regression or classification), and have the same inputs and outputs. (Time windows can be different.) You can view and access results from the Model Dashboard, Model comparison page.
For example, the following request returns all performance metrics for two models (identified by model uuid) for the timeframe defined by the start and end times.
[
{
"model_uuid": "7a6feecb-6f9b-424a-a3fd-aeea00a8864a",
"segment_id": 4,
"start_date": "16855776001",
"end_date": "1688169599"
},
{
"model_uuid": "34922551-d60a-4695-bcf9-d184a5dd015e",
"segment_id": 8,
"start_date": "16855776001",
"end_date": "1688169599"
}
]
Property | Type | Description |
---|---|---|
model_uuid | string | Unique identifier of the model for the related metrics. Returned in V1ModelsModel from /v1/models. |
segment_id | integer | Unique identifier of the segment for aggregating metrics, or 0 or null to aggregate the data for all possible segments for the model. Returned in V1SegmentModel from /v1/segment. |
start_date | Timestamp (Unix epoch time in milliseconds) | The start date for the window from which to retrieve metrics for this model. |
end_date | Timestamp (Unix epoch time in milliseconds) | The end date for the window from which to retrieve metrics for this model. |