Don't invert false positive and true negative. Adapt threshold for better results
This commit is contained in:
parent
9f161adb2f
commit
f438ae6c79
|
@ -92,9 +92,11 @@ def compute_optimal_matrix_size(threshold: float) -> tuple[int, int]:
|
||||||
We want that this value is lower than the expected threshold to avoid
|
We want that this value is lower than the expected threshold to avoid
|
||||||
true negatives, but we want that this value stay lear the expected
|
true negatives, but we want that this value stay lear the expected
|
||||||
value since we also want to avoid false positives.
|
value since we also want to avoid false positives.
|
||||||
|
Since a check will be opered at the end on candidate pairs, we mainly
|
||||||
|
want to minimize true negatives instead of false positives.
|
||||||
|
|
||||||
Then, we ensure that the estimated threshold is between
|
Then, we ensure that the estimated threshold is between
|
||||||
2/3*threshold and threshold.
|
threshold/2 and 4*threshold/5.
|
||||||
|
|
||||||
To achieve that, we start from some values, then we add bands if the
|
To achieve that, we start from some values, then we add bands if the
|
||||||
threshold is too high, or add some rows per band if it is too high.
|
threshold is too high, or add some rows per band if it is too high.
|
||||||
|
@ -105,9 +107,9 @@ def compute_optimal_matrix_size(threshold: float) -> tuple[int, int]:
|
||||||
bands = 16
|
bands = 16
|
||||||
est_threshold = (1 / bands) ** (1 / rows)
|
est_threshold = (1 / bands) ** (1 / rows)
|
||||||
# Threshold is not acceptable
|
# Threshold is not acceptable
|
||||||
while not (2 * threshold / 3 < est_threshold < threshold):
|
while not (threshold / 2 < est_threshold < 4 * threshold / 5):
|
||||||
# Add bands
|
# Add bands
|
||||||
if est_threshold >= threshold:
|
if est_threshold >= 4 * threshold / 5:
|
||||||
bands *= 2
|
bands *= 2
|
||||||
# Add rows
|
# Add rows
|
||||||
else:
|
else:
|
||||||
|
@ -292,8 +294,8 @@ def main():
|
||||||
if ns.stats:
|
if ns.stats:
|
||||||
tp, fp, tn, fn = output
|
tp, fp, tn, fn = output
|
||||||
print(f"True positive: {tp}", file=sys.stderr)
|
print(f"True positive: {tp}", file=sys.stderr)
|
||||||
print(f"False positive: {tn}", file=sys.stderr)
|
print(f"False positive: {fp}", file=sys.stderr)
|
||||||
print(f"True negative: {fp}", file=sys.stderr)
|
print(f"True negative: {tn}", file=sys.stderr)
|
||||||
print(f"False negative: {fn}", file=sys.stderr)
|
print(f"False negative: {fn}", file=sys.stderr)
|
||||||
|
|
||||||
tp_rate = tp / (tp + fn)
|
tp_rate = tp / (tp + fn)
|
||||||
|
|
Loading…
Reference in New Issue