mirror of
https://gitlab.crans.org/mediatek/med.git
synced 2024-11-30 06:53:03 +00:00
Normalize before removing wrong letters
This commit is contained in:
parent
8fa724e848
commit
3edc3ffa02
@ -31,7 +31,6 @@ def generate_side_identifier(title, authors, subtitle=None):
|
|||||||
title_normalized = re.sub(r'^THE ', '', title_normalized)
|
title_normalized = re.sub(r'^THE ', '', title_normalized)
|
||||||
title_normalized = re.sub(r'Œ', 'OE', title_normalized)
|
title_normalized = re.sub(r'Œ', 'OE', title_normalized)
|
||||||
title_normalized = title_normalized.replace(' ', '')
|
title_normalized = title_normalized.replace(' ', '')
|
||||||
title_normalized = re.sub("[^A-Z0-9$]", "", title_normalized)
|
|
||||||
title_normalized = ''.join(
|
title_normalized = ''.join(
|
||||||
char
|
char
|
||||||
for char in unicodedata.normalize(
|
for char in unicodedata.normalize(
|
||||||
@ -39,6 +38,7 @@ def generate_side_identifier(title, authors, subtitle=None):
|
|||||||
if all(not unicodedata.category(char).startswith(cat)
|
if all(not unicodedata.category(char).startswith(cat)
|
||||||
for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
|
for cat in {'M', 'P', 'Z', 'C'}) or char == ' '
|
||||||
).casefold().upper()
|
).casefold().upper()
|
||||||
|
title_normalized = re.sub("[^A-Z0-9$]", "", title_normalized)
|
||||||
authors = authors.copy()
|
authors = authors.copy()
|
||||||
|
|
||||||
def sort(author):
|
def sort(author):
|
||||||
|
Loading…
Reference in New Issue
Block a user