Stronger alias normalisation, ensure that normalized strings are encoded in ASCII. Closes #52

This commit is contained in:
Yohann D'ANELLO 2020-08-10 18:36:47 +02:00
parent c277d8bccd
commit 9c816a288d
1 changed files with 3 additions and 3 deletions

View File

@ -244,12 +244,12 @@ class Alias(models.Model):
@staticmethod @staticmethod
def normalize(string): def normalize(string):
""" """
Normalizes a string: removes most diacritics and does casefolding Normalizes a string: removes most diacritics, does casefolding and ignore non-ASCII characters
""" """
return ''.join( return ''.join(
char for char in unicodedata.normalize('NFKD', string.casefold()) char for char in unicodedata.normalize('NFKD', string.casefold().replace('æ', 'ae').replace('œ', 'oe'))
if all(not unicodedata.category(char).startswith(cat) if all(not unicodedata.category(char).startswith(cat)
for cat in {'M', 'P', 'Z', 'C'})).casefold() for cat in {'M', 'P', 'Z', 'C'})).casefold().encode('ascii', 'ignore').decode('ascii')
def clean(self): def clean(self):
normalized_name = self.normalize(self.name) normalized_name = self.normalize(self.name)