From 9c816a288deee435c9b24f4b1084d17c5688d104 Mon Sep 17 00:00:00 2001 From: Yohann D'ANELLO Date: Mon, 10 Aug 2020 18:36:47 +0200 Subject: [PATCH] Stronger alias normalisation, ensure that normalized strings are encoded in ASCII. Closes #52 --- apps/note/models/notes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/note/models/notes.py b/apps/note/models/notes.py index 9274dff6..eac5369f 100644 --- a/apps/note/models/notes.py +++ b/apps/note/models/notes.py @@ -244,12 +244,12 @@ class Alias(models.Model): @staticmethod def normalize(string): """ - Normalizes a string: removes most diacritics and does casefolding + Normalizes a string: removes most diacritics, does casefolding and ignore non-ASCII characters """ return ''.join( - char for char in unicodedata.normalize('NFKD', string.casefold()) + char for char in unicodedata.normalize('NFKD', string.casefold().replace('æ', 'ae').replace('œ', 'oe')) if all(not unicodedata.category(char).startswith(cat) - for cat in {'M', 'P', 'Z', 'C'})).casefold() + for cat in {'M', 'P', 'Z', 'C'})).casefold().encode('ascii', 'ignore').decode('ascii') def clean(self): normalized_name = self.normalize(self.name)