Skip to content

Instantly share code, notes, and snippets.

@dridk
Created June 28, 2025 12:15
Show Gist options
  • Select an option

  • Save dridk/3be7d1994b7e11b95579d70c3a223c61 to your computer and use it in GitHub Desktop.

Select an option

Save dridk/3be7d1994b7e11b95579d70c3a223c61 to your computer and use it in GitHub Desktop.
extraction des code CIM10 depuis un fichier RDF dans un fichier parquet
import rdflib
import polars
# Chargement de la terminologie
g = rdflib.Graph()
g.parse("cim10.rdf")
# Requete SPARQL pour récupérer les variables d'interets
query = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX xkos: <http://rdf-vocabulary.ddialliance.org/xkos#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX atih: <http://data.esante.gouv.fr/atih/>
SELECT ?concept ?code ?label ?path ?type ?synonymes ?inclusion_note ?exclusion_note
WHERE {
?concept rdfs:subClassOf* atih:cim10 .
?concept rdfs:label ?label.
?concept skos:notation ?code.
?concept rdfs:subClassOf+ ?superClass.
?superClass skos:notation ?path.
?concept dc:type ?type.
OPTIONAL { ?concept skos:altLabel ?synonymes. }
OPTIONAL { ?concept atih:inclusionNote ?inclusion_note . }
OPTIONAL { ?concept atih:exclusionNote ?exclusion_note . }
}
"""
# Execution de la requete SPARQL
records = g.query(sparql)
# Génération d'un dataframe pola.rs
columns = [str(i) for i in records.vars]
recs = []
for rec in records:
if isinstance(rec, tuple):
recs.append(rec)
else:
raise TypeError("Records must contains iterable ")
df = pl.DataFrame([{str.upper(columns[i]): str(v) for i, v in enumerate(rec)} for rec in recs])
df = df.group_by("CONCEPT").agg(
pl.col("CODE").first(),
pl.col("CODE").first().str.replace("\.", "").alias("CODE_2"),
pl.col("LABEL").first(),
pl.col("PATH").reverse(),
pl.col("SYNONYMES").drop_nulls(),
pl.col("TYPE").first(),
pl.col("INCLUSION_NOTE").first(),
pl.col("EXCLUSION_NOTE").first(),
)
df.write_parquet("cim10.parquet")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment