diff --git a/.gitignore b/.gitignore index ce6ab41..58ee091 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,8 @@ *.aux *.log +*.nav *.nt +*.out *.pdf +*.snm +*.toc diff --git a/Presentation.tex b/Presentation.tex new file mode 100644 index 0000000..a92baee --- /dev/null +++ b/Presentation.tex @@ -0,0 +1,156 @@ +\documentclass[aspectratio=169]{beamer} + +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage[english]{babel} + +\usetheme{metropolis} + +\title{Keys in Graphs} +\author{Yohann D'Anello} +\date{January, $31^{\text{st}}$ 2022} + +\begin{document} + +\begin{frame} +\maketitle +\end{frame} + +\begin{frame}{Project goal} +This project aims to find Graph keys, as defined in +\footnote{\url{https://www.researchgate.net/publication/283189709_Keys_for_graphs}}. +A Graph Key describes the relations that an object can have with their keys, and +what relations these involved objects can have. +\end{frame} + +\begin{frame}{Graph key example} +\begin{center} +\begin{tikzpicture}[y=3cm] +\node[draw] (0) at (0, 0) {Book}; +\node[] (00) at (-3, -1) {x}; +\node[draw] (01) at (-1, -1) {Person}; +\node[] (02) at (1, -1) {y}; +\node[draw] (03) at (3, -1) {Company}; +\node[draw] (010) at (-2, -2) {Country}; +\node[] (011) at (0, -2) {z}; +\node[] (030) at (3, -2) {t}; +\draw[->] (0) -- (00) node[midway,above,sloped] {title}; +\draw[->] (0) -- (01) node[midway,above,sloped] {author}; +\draw[->] (0) -- (02) node[midway,above,sloped] {subtitle}; +\draw[->] (0) -- (03) node[midway,above,sloped] {publisher}; +\draw[->] (01) -- (010) node[midway,above,sloped] {nationality}; +\draw[->] (01) -- (011) node[midway,above,sloped] {last name}; +\draw[->] (03) -- (030) node[midway,above,sloped] {identifier}; +\end{tikzpicture} +\end{center} +\end{frame} + +\begin{frame}{How generate keys?} +To generate these keys, one suggests to find $n$-almost keys using SAKey, +then to explore involved relations that define a domain and a range, and to +explore recursively the related fields. + + \begin{itemize} + \item Choose a dataset + \item Use \emph{SAKey} to find $n$-almost keys in the dataset + \item Find extracted relations that define a domain and a range + \item Explore recursively graph keys of discovered related items + \item Concatene these graphs + \end{itemize} +\end{frame} + +\begin{frame}{Implementation} + \begin{itemize} + \item Simple script using Python 3.9+ + \item Libraries: \emph{beautifulsoup4}, \emph{lxml} (ontology parsing), + \emph{SPARQLWrapper} (for SPARQL queries) + \item Uses the given binary for SAKey + \item Source code: \url{https://gitlab.crans.org/ynerant/graph-keys} + \end{itemize} +\end{frame} + +\begin{frame}{Algorithm} + \begin{itemize} + \item Input: class name $c$, threshold $n$, query limit $l$, recursivity limit $r$ + \item Query DBPedia to get $l$ first rows of triples that describe the class $c$ + \item Use \emph{SAKey} to find $n$-almost keys in the computed dataset of $l$ triples + \item Parse relations and filter those define a range + \item Process recursively on each discovered range with decrementing $r$, until $r$ is equal to $0$ + \item Compute all given graphs + \item Display them + \end{itemize} +\end{frame} + +\begin{frame}{Usage} + \texttt{./main.py Library 5 -{}-limit 3000 -{}-recursion 3} +\end{frame} + +\begin{frame}{Difference with the given binary} + \begin{itemize} + \item Given material: linear + \item Given datasets: concern only one type of data, impossible to explore more with these triples + \item $\implies$ Choice to query DBPedia to get more data + \end{itemize} +\end{frame} + +\begin{frame}{Discovered ranges} + \begin{itemize} + \item Example: we discovered that \texttt{[inCemetery, hasSize]} is a key for + \emph{GraveMonument} + \item Relation \texttt{inCemetery} has for domain \texttt{GraveMonument} + and for range \texttt{Cemetery} + \item We can now query data about cemeteries to get larger keys + \end{itemize} + + \begin{center} + \begin{tikzpicture}[y=3cm] + \node[draw] (0) at (0, 0) {GraveMonument}; + \node[draw] (1) at (-2, -1) {Cemetery}; + \node (2) at (2, -1) {x}; + + \draw[->] (0) -- (1) node[midway,above,sloped] {inCemetery}; + \draw[->] (0) -- (2) node[midway,above,sloped] {hasSize}; + \end{tikzpicture} + \end{center} +\end{frame} + +\begin{frame}{Output example} +\begin{figure}[H] +\centering +\begin{tikzpicture}[x=3cm] +\node[draw] (0) at (0, 0) {Library}; +\pause +\node[draw] (0-0) at (1, 0) {Place}; +\draw[->] (0) -- (0-0) node[midway,above,sloped] {location}; +\pause +\node[draw] (0-0-0) at (2, 0) {City}; +\draw[->] (0-0) -- (0-0-0) node[midway,above,sloped] {capital}; +\pause +\node[draw] (0-0-0-0) at (3, 0) {Image}; +\draw[->] (0-0-0) -- (0-0-0-0) node[midway,above,sloped] {thumbnail}; +\end{tikzpicture} +\caption{Sample output of the program} +\end{figure} +\end{frame} + +\begin{frame}{Limitations} + \begin{itemize} + \item Only few keys are well-typed (with a domain and/or a range) + \item We only extend minimal $n$-almost keys + \item We never generate minimal graph keys $\implies$ some keys could never be generated + \item Lot of paths + \end{itemize} +\end{frame} + +\begin{frame}{Further works} + \begin{itemize} + \item Generate graphs with leaves (easy) + \item Explore triples of the form \texttt{?x ?r ?y} where we specify + the class of \texttt{?y} instead of \texttt{?x} + \item Explore relations that does not define any range (hard) + \item Take into account the \texttt{sameAs} relation + \item Find a way to generate more complex graphs + \end{itemize} +\end{frame} + +\end{document}