diff --git a/.gitignore b/.gitignore index 27dfa4f..56e2b66 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ __pycache__ .env *.sqlite3 +*.sqlite3-journal data/ diff --git a/alembic/versions/62495c819f2b_ajout_des_modèles_de_géométries_.py b/alembic/versions/62495c819f2b_ajout_des_modèles_de_géométries_.py new file mode 100644 index 0000000..d950f56 --- /dev/null +++ b/alembic/versions/62495c819f2b_ajout_des_modèles_de_géométries_.py @@ -0,0 +1,76 @@ +"""Ajout des modèles de géométries administratives + +Revision ID: 62495c819f2b +Revises: 9d99f3ea6b66 +Create Date: 2024-06-08 16:13:15.457114 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '62495c819f2b' +down_revision: Union[str, None] = '9d99f3ea6b66' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('region', + sa.Column('code_insee', sa.String(length=3), nullable=False), + sa.Column('libelle', sa.String(length=64), nullable=False), + sa.Column('geometry', sa.JSON(), nullable=False), + sa.PrimaryKeyConstraint('code_insee'), + sa.UniqueConstraint('libelle') + ) + op.create_table('departement', + sa.Column('code_insee', sa.String(length=2), nullable=False), + sa.Column('libelle', sa.String(length=64), nullable=False), + sa.Column('region_code', sa.String(length=3), nullable=False), + sa.Column('geometry', sa.JSON(), nullable=False), + sa.ForeignKeyConstraint(['region_code'], ['region.code_insee'], ), + sa.PrimaryKeyConstraint('code_insee'), + sa.UniqueConstraint('libelle') + ) + op.create_table('circonscription', + sa.Column('id', sa.String(length=6), nullable=False), + sa.Column('departement_code', sa.String(length=2), nullable=False), + sa.Column('numero', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['departement_code'], ['departement.code_insee'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('commune', + sa.Column('code_insee', sa.String(length=5), nullable=False), + sa.Column('libelle', sa.String(length=64), nullable=False), + sa.Column('departement_code', sa.String(length=2), nullable=False), + sa.Column('geometry', sa.JSON(), nullable=False), + sa.ForeignKeyConstraint(['departement_code'], ['departement.code_insee'], ), + sa.PrimaryKeyConstraint('code_insee') + ) + op.create_table('bureau_vote', + sa.Column('id', sa.String(length=16), nullable=False), + sa.Column('commune_code', sa.String(length=5), nullable=False), + sa.Column('code_bureau', sa.String(length=8), nullable=False), + sa.Column('circo_code', sa.String(length=6), nullable=False), + sa.Column('libelle', sa.String(length=256), nullable=False), + sa.Column('adresse', sa.String(length=256), nullable=True), + sa.Column('geometry', sa.JSON(), nullable=False), + sa.ForeignKeyConstraint(['circo_code'], ['circonscription.id'], ), + sa.ForeignKeyConstraint(['commune_code'], ['commune.code_insee'], ), + sa.PrimaryKeyConstraint('id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('bureau_vote') + op.drop_table('commune') + op.drop_table('circonscription') + op.drop_table('departement') + op.drop_table('region') + # ### end Alembic commands ### diff --git a/main.py b/main.py index fc76a28..ac7af1f 100755 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ import os from dotenv import load_dotenv from sqlalchemy import create_engine -from nupes.scripts import import_candidats_2024 +from nupes.scripts import import_candidats_2024, import_geographie def parse_args(): @@ -15,7 +15,8 @@ def parse_args(): choices=["présidentielle", "législatives", "sénatoriales", "européennes", "régionales", "départementales", "municipales"], default="européennes") parser.add_argument('--year', '-y', type=int, help="Année de l'élection", default=2024) - parser.add_argument('action', help="Action à réaliser", choices=["import_candidats"]) + parser.add_argument('action', help="Action à réaliser", + choices=["import_candidats", "import_geographie"]) parser.add_argument('--debug', '-d', action='store_true', help="Mode debug") return parser.parse_args() @@ -28,6 +29,9 @@ def main(): engine = create_engine(os.getenv("SQLALCHEMY_URL"), echo=args.debug) match args.action: + case "import_geographie": + print(f"Import des données géographiques pour les élections {args.type} {args.year}") + import_geographie.run(engine) case "import_candidats": print(f"Import des candidats pour les élections {args.type} {args.year}") import_candidats_2024.run(engine) diff --git a/nupes/data.py b/nupes/data.py index 335fd76..ffbea27 100644 --- a/nupes/data.py +++ b/nupes/data.py @@ -6,15 +6,16 @@ import requests DATA_DIR = Path(__file__).parent / 'data' -def get_file(url, filename): +def get_file(url, filename, etag: str = None): if not DATA_DIR.is_dir(): DATA_DIR.mkdir() head_response = requests.head(url, allow_redirects=True) headers = head_response.headers - etag = headers.get('ETag').split('/')[-1].replace('"', '') + if not etag: + etag = headers.get('ETag').split('/')[-1].replace('"', '') - file = DATA_DIR / f"{etag}_{filename}" + file = DATA_DIR / filename.format(etag=etag) if file.exists(): return file diff --git a/nupes/models/__init__.py b/nupes/models/__init__.py index 246ed55..862fb36 100644 --- a/nupes/models/__init__.py +++ b/nupes/models/__init__.py @@ -1,2 +1,3 @@ from .base import Base +from .geographie import Region, Departement, Commune, Circonscription, BureauVote from .europeennes2024 import Bloc as Bloc2024, Nuance as Nuance2024, Liste as Liste2024, Candidat as Candidat2024 diff --git a/nupes/models/geographie.py b/nupes/models/geographie.py new file mode 100644 index 0000000..9861db4 --- /dev/null +++ b/nupes/models/geographie.py @@ -0,0 +1,66 @@ +from typing import List + +from sqlalchemy import Float, ForeignKey, Integer, JSON, String +from sqlalchemy.orm import mapped_column, Mapped, relationship + +from nupes.models import Base + + +class Region(Base): + __tablename__ = "region" + + code_insee: Mapped[str] = mapped_column(String(3), primary_key=True) + libelle: Mapped[str] = mapped_column(String(64), unique=True) + geometry: Mapped[dict] = mapped_column(JSON()) + + departements: Mapped[List["Departement"]] = relationship("Departement", back_populates="region") + + +class Departement(Base): + __tablename__ = "departement" + + code_insee: Mapped[str] = mapped_column(String(2), primary_key=True) + libelle: Mapped[str] = mapped_column(String(64), unique=True) + region_code: Mapped[int] = mapped_column(ForeignKey("region.code_insee")) + geometry: Mapped[dict] = mapped_column(JSON()) + + region: Mapped[Region] = relationship(Region, back_populates="departements") + communes: Mapped[List["Commune"]] = relationship("Commune", back_populates="departement") + + +class Commune(Base): + __tablename__ = "commune" + + code_insee: Mapped[str] = mapped_column(String(5), primary_key=True) + libelle: Mapped[str] = mapped_column(String(64)) + departement_code: Mapped[str] = mapped_column(ForeignKey("departement.code_insee")) + geometry: Mapped[dict] = mapped_column(JSON()) + + departement: Mapped[Departement] = relationship(Departement, back_populates="communes") + bureaux_vote: Mapped[List["BureauVote"]] = relationship("BureauVote", back_populates="commune") + + +class Circonscription(Base): + __tablename__ = "circonscription" + + id: Mapped[str] = mapped_column(String(6), primary_key=True) + departement_code: Mapped[str] = mapped_column(ForeignKey("departement.code_insee")) + numero: Mapped[int] = mapped_column(Integer()) + + departement: Mapped[Departement] = relationship(Departement) + bureaux_vote: Mapped[List["BureauVote"]] = relationship("BureauVote", back_populates="circonscription") + + +class BureauVote(Base): + __tablename__ = "bureau_vote" + + id: Mapped[str] = mapped_column(String(16), primary_key=True) + commune_code: Mapped[str] = mapped_column(ForeignKey("commune.code_insee")) + code_bureau: Mapped[str] = mapped_column(String(8)) + circo_code: Mapped[str] = mapped_column(ForeignKey("circonscription.id")) + libelle: Mapped[str] = mapped_column(String(256)) + adresse: Mapped[str] = mapped_column(String(256), nullable=True) + geometry: Mapped[dict] = mapped_column(JSON()) + + commune: Mapped[Commune] = relationship(Commune, back_populates="bureaux_vote") + circonscription: Mapped[Circonscription] = relationship(Circonscription, back_populates="bureaux_vote") diff --git a/nupes/scripts/import_candidats_2024.py b/nupes/scripts/import_candidats_2024.py index 632f7ac..f32b9da 100644 --- a/nupes/scripts/import_candidats_2024.py +++ b/nupes/scripts/import_candidats_2024.py @@ -125,7 +125,7 @@ def importer_listes(engine: Engine) -> None: def importer_candidats(engine: Engine) -> None: DATASET_URL = "https://www.data.gouv.fr/fr/datasets/r/483cd4bd-1b0e-4b52-a923-aadadf9c8f62" - file = get_file(DATASET_URL, "candidats_2024.csv") + file = get_file(DATASET_URL, "candidats_2024-{etag}.csv") with file.open('r') as f: with Session(engine) as session: diff --git a/nupes/scripts/import_geographie.py b/nupes/scripts/import_geographie.py new file mode 100644 index 0000000..e502a01 --- /dev/null +++ b/nupes/scripts/import_geographie.py @@ -0,0 +1,177 @@ +import json +from datetime import datetime + +import requests +from sqlalchemy import Engine, select +from sqlalchemy.orm import Session + +from nupes.data import get_file +from nupes.models.geographie import BureauVote, Circonscription, Commune, Departement, Region + + +def importer_regions(engine: Engine) -> None: + etag = requests.get( + "https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets" + "/georef-france-region?select=data_processed").json()['data_processed'] + file = get_file("https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets" + "/georef-france-region/exports/geojson?lang=fr&timezone=Europe%2FBerlin", + "georef-france-region-{etag}.geojson", etag) + + with file.open('r') as f: + features = json.load(f)['features'] + + with Session(engine) as session: + for feature in features: + region_dict = feature['properties'] + code_region = region_dict['reg_code'][0] + nom_region = region_dict['reg_name'][0] + + if region := session.execute(select(Region).filter_by(code_insee=code_region)).scalar_one_or_none(): + region.libelle = nom_region + region.geometry = feature['geometry'] + else: + region = Region(code_insee=code_region, libelle=nom_region, geometry=feature['geometry']) + session.add(region) + + session.commit() + + +def importer_departements(engine: Engine) -> None: + etag = requests.get( + "https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets" + "/georef-france-departement?select=data_processed").json()['data_processed'] + file = get_file("https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets" + "/georef-france-departement/exports/geojson?lang=fr&timezone=Europe%2FBerlin", + "georef-france-departement-{etag}.geojson", etag) + + + with file.open('r') as f: + features = json.load(f)['features'] + + with Session(engine) as session: + for feature in features: + dpt_dict = feature['properties'] + code_dpt = dpt_dict['dep_code'][0] + nom_dpt = dpt_dict['dep_name'][0] + + if dpt := session.execute(select(Departement).filter_by(code_insee=code_dpt)).scalar_one_or_none(): + dpt.libelle = nom_dpt + dpt.region_code = dpt_dict['reg_code'][0] + dpt.geometry = feature['geometry'] + else: + dpt = Departement(code_insee=code_dpt, libelle=nom_dpt, region_code=dpt_dict['reg_code'][0], + geometry=feature['geometry']) + session.add(dpt) + + session.commit() + + +def importer_communes(engine: Engine) -> None: + etag = requests.get( + "https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets" + "/georef-france-commune?select=data_processed").json()['data_processed'] + file = get_file("https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets" + "/georef-france-commune/exports/geojson?lang=fr&timezone=Europe%2FBerlin", + "georef-france-commune-{etag}.geojson", etag) + + with file.open('r') as f: + features = json.load(f)['features'] + + with Session(engine) as session: + for feature in features: + commune_dict = feature['properties'] + code_commune = commune_dict['com_code'][0] + nom_commune = commune_dict['com_name'][0] + + if commune := session.execute(select(Commune).filter_by(code_insee=code_commune)).scalar_one_or_none(): + commune.libelle = nom_commune + commune.departement_code = commune_dict['dep_code'][0] + commune.geometry = feature['geometry'] + else: + commune = Commune(code_insee=code_commune, libelle=nom_commune, + departement_code=commune_dict['dep_code'][0], geometry=feature['geometry']) + session.add(commune) + + session.commit() + + +def importer_bureaux_vote(engine: Engine) -> None: + etag = requests.get( + "https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets" + "/elections-france-bureau-vote-2022?select=data_processed").json()['data_processed'] + file = get_file("https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets" + "/elections-france-bureau-vote-2022/exports/geojson?lang=fr&timezone=Europe%2FBerlin", + "elections-france-bureau-vote-2022-{etag}.geojson", etag) + + with file.open('r') as f: + features = json.load(f)['features'] + + with Session(engine) as session: + for feature in features: + bv_dict = feature['properties'] + code_commune = bv_dict['com_code'] + if not code_commune: + print(feature) + continue + code_commune = code_commune.split('/')[0] + code_bv = bv_dict['code'] or "0" + code_circo = bv_dict['circonscription_code'] + bv_id = f"{code_commune}_{code_bv}" + bv_libelle = bv_dict['libelle'] or "Bureau unique" + + if not session.execute(select(Circonscription).filter_by(id=code_circo)).scalar_one_or_none(): + session.add(Circonscription(id=code_circo, departement_code=code_commune[:2], + numero=int(code_circo[3:]))) + + if bv := session.execute(select(BureauVote).filter_by(id=bv_id)).scalar_one_or_none(): + bv.commune_code = code_commune + bv.code_bureau = code_bv + bv.circo_code = code_circo + bv.libelle = bv_libelle + bv.adresse = bv_dict['adresse'] + else: + bv = BureauVote(id=bv_id, commune_code=code_commune, code_bureau=code_bv, circo_code=code_circo, + libelle=bv_libelle, adresse=bv_dict['adresse'], + geometry={}) + session.add(bv) + + session.commit() + + +def importer_contours_bureaux_vote(engine: Engine) -> None: + file = get_file("https://www.data.gouv.fr/fr/datasets/r/f98165a7-7c37-4705-a181-bcfc943edc73", + "contours-bureaux-vote.geojson") + + with file.open('r') as f: + features = json.load(f)['features'] + + with Session(engine) as session: + for feature in features: + bv_id: str = feature['properties']['id_bv'] + com_code, bv_code = bv_id.split('_') + bv_code = bv_code.replace("-", " ").replace(".", " ").strip() + while len(bv_code) >= 2 and bv_code[0] == '0': + bv_code = bv_code[1:] + while " " in bv_code: + bv_code = bv_code.replace(" ", " ") + bv_id = f"{com_code}_{bv_code}" + + if bv := session.execute(select(BureauVote).filter_by(id=bv_id)).scalar_one_or_none(): + bv.geometry = feature['geometry'] + else: + results = session.execute(select(BureauVote).filter_by(commune_code=com_code)).scalars().all() + if len(results) == 1: + bv = results[0] + bv.geometry = feature['geometry'] + else: + print(f"Bureau de vote {bv_id} non trouvé") + + session.commit() + + +def run(engine: Engine) -> None: + # importer_regions(engine) + # importer_departements(engine) + # importer_communes(engine) + importer_bureaux_vote(engine) + importer_contours_bureaux_vote(engine)