Stronger alias normalisation, ensure that normalized strings are encoded in ASCII. Closes #52

This commit is contained in:
Yohann D'ANELLO 2020-08-10 18:36:47 +02:00
parent c277d8bccd
commit 9c816a288d
1 changed files with 3 additions and 3 deletions

View File

@ -244,12 +244,12 @@ class Alias(models.Model):
@staticmethod
def normalize(string):
"""
Normalizes a string: removes most diacritics and does casefolding
Normalizes a string: removes most diacritics, does casefolding and ignore non-ASCII characters
"""
return ''.join(
char for char in unicodedata.normalize('NFKD', string.casefold())
char for char in unicodedata.normalize('NFKD', string.casefold().replace('æ', 'ae').replace('œ', 'oe'))
if all(not unicodedata.category(char).startswith(cat)
for cat in {'M', 'P', 'Z', 'C'})).casefold()
for cat in {'M', 'P', 'Z', 'C'})).casefold().encode('ascii', 'ignore').decode('ascii')
def clean(self):
normalized_name = self.normalize(self.name)