mirror of https://gitlab.crans.org/bde/nk20
Stronger alias normalisation, ensure that normalized strings are encoded in ASCII. Closes #52
This commit is contained in:
parent
c277d8bccd
commit
9c816a288d
|
@ -244,12 +244,12 @@ class Alias(models.Model):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def normalize(string):
|
def normalize(string):
|
||||||
"""
|
"""
|
||||||
Normalizes a string: removes most diacritics and does casefolding
|
Normalizes a string: removes most diacritics, does casefolding and ignore non-ASCII characters
|
||||||
"""
|
"""
|
||||||
return ''.join(
|
return ''.join(
|
||||||
char for char in unicodedata.normalize('NFKD', string.casefold())
|
char for char in unicodedata.normalize('NFKD', string.casefold().replace('æ', 'ae').replace('œ', 'oe'))
|
||||||
if all(not unicodedata.category(char).startswith(cat)
|
if all(not unicodedata.category(char).startswith(cat)
|
||||||
for cat in {'M', 'P', 'Z', 'C'})).casefold()
|
for cat in {'M', 'P', 'Z', 'C'})).casefold().encode('ascii', 'ignore').decode('ascii')
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
normalized_name = self.normalize(self.name)
|
normalized_name = self.normalize(self.name)
|
||||||
|
|
Loading…
Reference in New Issue