Normalize before removing wrong letters
This commit is contained in:
parent
8fa724e848
commit
3edc3ffa02
|
@ -31,7 +31,6 @@ def generate_side_identifier(title, authors, subtitle=None):
|
|||
title_normalized = re.sub(r'^THE ', '', title_normalized)
|
||||
title_normalized = re.sub(r'Œ', 'OE', title_normalized)
|
||||
title_normalized = title_normalized.replace(' ', '')
|
||||
title_normalized = re.sub("[^A-Z0-9$]", "", title_normalized)
|
||||
title_normalized = ''.join(
|
||||
char
|
||||
for char in unicodedata.normalize(
|
||||
|
@ -39,6 +38,7 @@ def generate_side_identifier(title, authors, subtitle=None):
|
|||
if all(not unicodedata.category(char).startswith(cat)
|
||||
for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
|
||||
).casefold().upper()
|
||||
title_normalized = re.sub("[^A-Z0-9$]", "", title_normalized)
|
||||
authors = authors.copy()
|
||||
|
||||
def sort(author):
|
||||
|
|
Loading…
Reference in New Issue