1
0
mirror of https://gitlab.crans.org/mediatek/med.git synced 2024-11-27 00:07:12 +00:00

Normalize after replacing L'

This commit is contained in:
Yohann D'ANELLO 2020-10-02 13:54:16 +02:00
parent 6985e39130
commit 8b097dc4e0

View File

@ -21,13 +21,6 @@ def generate_side_identifier(title, authors, subtitle=None):
title_normalized = title.upper() title_normalized = title.upper()
title_normalized = title_normalized.replace('', '\'') title_normalized = title_normalized.replace('', '\'')
title_normalized = ''.join(
char
for char in unicodedata.normalize(
'NFKD', title_normalized.casefold())
if all(not unicodedata.category(char).startswith(cat)
for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
).casefold().upper()
title_normalized = re.sub(r'^DE ', '', title_normalized) title_normalized = re.sub(r'^DE ', '', title_normalized)
title_normalized = re.sub(r'^LE ', '', title_normalized) title_normalized = re.sub(r'^LE ', '', title_normalized)
title_normalized = re.sub(r'^LA ', '', title_normalized) title_normalized = re.sub(r'^LA ', '', title_normalized)
@ -37,6 +30,13 @@ def generate_side_identifier(title, authors, subtitle=None):
title_normalized = re.sub(r'Œ', 'OE', title_normalized) title_normalized = re.sub(r'Œ', 'OE', title_normalized)
title_normalized = title_normalized.replace(' ', '') title_normalized = title_normalized.replace(' ', '')
title_normalized = re.sub("[^A-Z0-9$]", "", title_normalized) title_normalized = re.sub("[^A-Z0-9$]", "", title_normalized)
title_normalized = ''.join(
char
for char in unicodedata.normalize(
'NFKD', title_normalized.casefold())
if all(not unicodedata.category(char).startswith(cat)
for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
).casefold().upper()
authors = authors.copy() authors = authors.copy()
def sort(author): def sort(author):