Last active
January 23, 2018 15:44
-
-
Save ColinMaudry/5169cb4e285ca94a160272b7b59a5411 to your computer and use it in GitHub Desktop.
Conversion des données XML du BOAMP vers JSON-LD
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"@context": { | |
"@vocab": "https://data.maudry.com/rdf/boamp#", | |
"boamp": "https://data.maudry.com/voc/boamp#", | |
"dct": "http://purl.org/dc/terms/", | |
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", | |
"rdfs": "http://www.w3.org/2000/01/rdf-schema#", | |
"xsd": "http://www.w3.org/2001/XMLSchema#", | |
"attributions": "https://boamp.maudry.com/attributions/", | |
"appeloffres": "https://boamp.maudry.com/appeloffres/", | |
"acheteurs": "https://boamp.maudry.com/acheteurs/", | |
"titulaires": "https://boamp.maudry.com/titulaires/", | |
"descripteurs": "https://boamp.maudry.com/descripteurs/", | |
"lots": "https://boamp.maudry.com/lots/", | |
"cpv": "https://boamp.maudry.com/cpv/", | |
"annonce": { | |
"@id": "boamp:annonce", | |
"@type": "rdfs:Class", | |
"rdfs:label": "Annonce" | |
}, | |
"appel-offre": { | |
"@id": "boamp:appel-offre", | |
"@type": "rdfs:Class", | |
"rdfs:label": "Appel d'offres", | |
"rdfs:subClassOf": "boamp:annonce" | |
}, | |
"attribution": { | |
"@id": "boamp:attribution", | |
"@type": "rdfs:Class", | |
"rdfs:label": "Attribution" | |
}, | |
"Acheteur": { | |
"@id": "boamp:Acheteur", | |
"@type": "rdfs:Class", | |
"rdfs:label": "Acheteur" | |
}, | |
"Cpv": { | |
"@id": "boamp:Cpv", | |
"@type": "rdfs:Class", | |
"rdfs:label": "Code CPV" | |
}, | |
"acheteur": { | |
"@id": "boamp:acheteur", | |
"@type": "@id", | |
"rdfs:label": "Acheteur" | |
}, | |
"Descripteur": { | |
"@id": "boamp:Descripteur", | |
"@type": "rdfs:Class", | |
"rdfs:label": "Descripteur" | |
}, | |
"Lot": { | |
"@id": "boamp:Lot", | |
"@type": "rdfs:Class", | |
"rdfs:label": "Lot" | |
}, | |
"etat": { | |
"@id": "boamp:etat", | |
"@type": "rdfs:Class", | |
"rdfs:label": "État" | |
}, | |
"region": { | |
"@id": "boamp:region", | |
"@type": "rdfs:Class", | |
"rdfs:label": "Région" | |
}, | |
"departement": { | |
"@id": "boamp:departement", | |
"@type": "rdfs:Class", | |
"rdfs:label": "Département" | |
}, | |
"commune": { | |
"@id": "boamp:commune", | |
"@type": "rdfs:Class", | |
"rdfs:label": "Commune" | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# xml2json = https://github.com/Cheedoong/xml2json | |
count=`ls $1/*.xml | wc -l` | |
i=0 | |
echo "$count XML files to process." | |
echo "" | |
echo "Converting XML files to JSON..." | |
for file in `ls $1/*.xml | xargs` | |
do | |
./xml2json $file > $file.json | |
done | |
echo "Categorizing files by nature..." | |
mkdir $1/ATTRIBUTION | |
mkdir $1/APPEL_OFFRE | |
mkdir $1/INTENTION_CONCLURE | |
mkdir $1/RECTIFICATIF | |
for file in `ls $1/*.json | xargs` | |
do | |
nature=`cat $file | grep -oP '(?<="NATURE":{")[A-Z_]*(?=")' | head -n 1` | |
mv -v $file $1/$nature/ | grep "failed" | |
done | |
countAttri=`ls $1/ATTRIBUTION/*.json | wc -l` | |
countAppel=`ls $1/APPEL_OFFRE/*.json | wc -l` | |
countIntention=`ls $1/INTENTION_CONCLURE/*.json | wc -l` | |
countRecti=`ls $1/RECTIFICATIF/*.json | wc -l` | |
success=$(( countAttri + countAppel + countIntention + countRecti)) | |
failed=$(( count - success )) | |
echo "" | |
echo "Attributions: $countAttri" | |
echo "Appels d'offres: $countAppel" | |
echo "Intentions de conclure: $countIntention" | |
echo "Rectificatifs: $countRecti" | |
echo "" | |
echo "Success: $success" | |
echo "Failed: $failed" | |
echo "" | |
echo "Converting appel d'offres to JSON-LD..." | |
for file in `ls $1/APPEL_OFFRE/*.json | xargs` | |
do | |
jq -f filtre-appel-offres.jq $file > $file.jsonld | |
done | |
countAppelLd=`ls $1/APPEL_OFFRE/*.jsonld | wc -l` | |
echo "Success: $countAppelLd" | |
echo "Failed: $(($countAppel - $countAppelLd))" | |
echo "" | |
echo "Converting attributions to JSON-LD..." | |
for file in `ls $1/ATTRIBUTION/*.json | xargs` | |
do | |
jq -f filtre-attribution.jq $file > $file.jsonld | |
done | |
countAttriLd=`ls $1/ATTRIBUTION/*.jsonld | wc -l` | |
echo "Success: $countAttriLd" | |
echo "Failed: $(($countAttri - $countAttriLd))" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
. | | |
def walk(f): | |
. as $in | |
| if type == "object" then | |
reduce keys[] as $key | |
( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f | |
elif type == "array" then map( walk(f) ) | f | |
else f | |
end; | |
.ANNONCE as $annonce | | |
.ANNONCE.GESTION.REFERENCE as $reference | | |
.ANNONCE.DONNEES as $donnees | | |
$donnees.OBJET as $objet | | |
$reference.IDWEB as $idweb | | |
$reference.TYPE_AVIS.NATURE | keys[0] | ascii_downcase as $type | | |
{ | |
"@context": "https://gist.githubusercontent.com/ColinMaudry/5169cb4e285ca94a160272b7b59a5411/raw/c05b569bc7c37297c1f91424d58b99bf325f7000/boamp-context.jsonld", | |
"@type": ("boamp:" + $type), | |
"@id": ("appeloffres:" + $idweb), | |
"boamp:idweb": $idweb, | |
"dct:identifier": $objet.REF_MARCHE, | |
"boamp:famille": $reference.TYPE_AVIS.FAMILLE | keys[0] | ascii_downcase, | |
"boamp:statut": $reference.TYPE_AVIS.STATUT | keys[0] | ascii_downcase, | |
"boamp:nomHtml": $annonce.GESTION.NOM_HTML, | |
"boamp:cpvPrincipal": ( | |
if ($objet.CPV | type) == "objet" then | |
{ | |
"@id":("cpv:" + $objet.CPV.PRINCIPAL), | |
"rdfs:label": $objet.CPV.PRINCIPAL, | |
"@type": "boamp:Cpv" | |
} elif ($objet.CPV | type) == "array" then | |
$objet.CPV | map( | |
{ | |
"@id":("cpv:" + .PRINCIPAL), | |
"rdfs:label": .PRINCIPAL, | |
"@type": "boamp:Cpv" | |
}) else null end), | |
"boamp:debutDiffusion": { | |
"@value": $annonce.GESTION.INDEXATION.DATE_PUBLICATION, | |
"@type": "xsd:date" | |
}, | |
"boamp:finDiffusion": { | |
"@value": $annonce.GESTION.INDEXATION.DATE_FIN_DIFFUSION, | |
"@type": "xsd:date" | |
}, | |
"boamp:descripteur": $annonce.GESTION.INDEXATION.DESCRIPTEURS.DESCRIPTEUR | | |
walk(if type == "object" then {"@id": ("descripteurs:" + .CODE)} elif type == "array" then map({"@id": (.["@id"])}) else . end), | |
"boamp:lot": ( | |
$objet.LOTS.LOT | if type == "array" then | |
map( | |
{ | |
"@id":("lots:" + ($objet.REF_MARCHE // $idweb) + "_" + (.NUM | gsub("\\W";"_"))), | |
"@type": "boamp:Lot", | |
"rdfs:label": .INTITULE, | |
"boamp:cpvPrincipal": ( | |
if (.CPV | type) == "objet" then | |
{ | |
"@id":("cpv:" + .CPV.PRINCIPAL), | |
"rdfs:label": .CPV.PRINCIPAL, | |
"@type": "boamp:Cpv" | |
} elif (.CPV | type) == "array" then | |
.CPV | map( | |
{ | |
"@id":("cpv:" + .PRINCIPAL), | |
"rdfs:label": .PRINCIPAL, | |
"@type": "boamp:Cpv" | |
}) else null end) | |
} | |
) elif type == "objet" then | |
{ | |
"@id":("lots:" + ($objet.REF_MARCHE // $idweb) + "_" + (.NUM | gsub("\\W";"_"))) | |
} | |
else null | |
end ), | |
#"boamp:criteresSociauxEnv": , | |
#"boamp:departementPublication": "27", | |
#"boamp:resumeObjet":"Pré du Bel Ebat - Accès pompiers SMAC : Plantations. Pré du Bel Ebat à Evreux" | |
"boamp:typeProcedure": (if ($donnees.PROCEDURE.TYPE_PROCEDURE | type) == "object" then | |
($donnees.PROCEDURE.TYPE_PROCEDURE | keys[0] | ascii_downcase) | |
else | |
null end), | |
"boamp:eligibleMps": (if ($donnees.CONDITION_PARTICIPATION.ELIGIBLE_MPS | type) == "object" then | |
($donnees.CONDITION_PARTICIPATION.ELIGIBLE_MPS | keys[0]) | |
else | |
null end), | |
"boamp:acheteur": { | |
"@id": ("acheteurs:" + | |
($donnees.IDENTITE.DENOMINATION | | |
gsub("\\W";"-") )), | |
"boamp:profilAcheteur": ( | |
if ($donnees.IDENTITE.URL_PROFIL_ACHETEUR | type) == "string" then | |
{"@id": $donnees.IDENTITE.URL_PROFIL_ACHETEUR} else null end) | |
, | |
"rdfs:label": $donnees.IDENTITE.DENOMINATION, | |
"boamp:codePostal": $donnees.IDENTITE.CP, | |
"@type": (if ($donnees.TYPE_POUVOIR_ADJUDICATEUR | type) == "object" then | |
("boamp:" + ($donnees.TYPE_POUVOIR_ADJUDICATEUR | keys[0] | | |
if . == "AUTRE" then | |
$donnees.TYPE_POUVOIR_ADJUDICATEUR.AUTRE | ascii_downcase | gsub("\\W";"-") else | |
. | ascii_downcase end) | |
) else | |
"boamp:Acheteur" end) | |
}, | |
"boamp:objetComplet": $objet.OBJET_COMPLET, | |
"boamp:titreMarche": $objet.TITRE_MARCHE | |
#"boamp:criteres":{}, | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
. | | |
def walk(f): | |
. as $in | |
| if type == "object" then | |
reduce keys[] as $key | |
( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f | |
elif type == "array" then map( walk(f) ) | f | |
else f | |
end; | |
.ANNONCE as $annonce | | |
.ANNONCE.GESTION.REFERENCE as $reference | | |
.ANNONCE.DONNEES as $donnees | | |
$donnees.ATTRIBUTION.DECISION as $decision | | |
$reference.IDWEB as $idweb | | |
$reference.TYPE_AVIS.NATURE | keys[0] | ascii_downcase as $type | | |
{ | |
"@context": "https://gist.githubusercontent.com/ColinMaudry/5169cb4e285ca94a160272b7b59a5411/raw/c05b569bc7c37297c1f91424d58b99bf325f7000/boamp-context.jsonld", | |
"@type": ("boamp:" + $type), | |
"@id": ($type + "s:" + $idweb), | |
"boamp:idweb": $idweb, | |
"dct:identifier":"", | |
"boamp:famille": $reference.TYPE_AVIS.FAMILLE | keys[0] | ascii_downcase, | |
"boamp:statut": $reference.TYPE_AVIS.STATUT | keys[0] | ascii_downcase, | |
"boamp:appelOffres" : $annonce.GESTION.MARCHE.ANNONCE_ANTERIEUR | | |
(if type == "array" then map({"@id":.REFERENCE.IDWEB}) | |
elif type == "objet" then | |
{"@id":.REFERENCE.IDWEB} | |
else null | |
end) | |
, | |
"boamp:nomHtml": $annonce.GESTION.NOM_HTML, | |
"boamp:debutDiffusion": { | |
"@value": $annonce.GESTION.INDEXATION.DATE_PUBLICATION, | |
"@type": "xsd:date" | |
}, | |
"boamp:finDiffusion": { | |
"@value": $annonce.GESTION.INDEXATION.DATE_FIN_DIFFUSION, | |
"@type": "xsd:date" | |
}, | |
"boamp:descripteur": $annonce.GESTION.INDEXATION.DESCRIPTEURS.DESCRIPTEUR | | |
walk(if type == "object" then {"@id": ("descripteurs:" + .CODE)} elif type == "array" then map({"@id": (.["@id"])}) else . end), | |
#"boamp:criteresSociauxEnv": , | |
#"boamp:departementPublication": "27", | |
#"boamp:resumeObjet":"Pré du Bel Ebat - Accès pompiers SMAC : Plantations. Pré du Bel Ebat à Evreux" | |
"boamp:acheteur": ( | |
if $donnees.IDENTITE.DENOMINATION then { | |
"@id": ("acheteurs:" + | |
($donnees.IDENTITE.DENOMINATION | | |
gsub("\\W";"-") )), | |
"boamp:profilAcheteur": ( | |
if $donnees.IDENTITE.URL_PROFIL_ACHETEUR then { | |
"@id": $donnees.IDENTITE.URL_PROFIL_ACHETEUR | |
} else null end), | |
"rdfs:label": $donnees.IDENTITE.DENOMINATION, | |
"boamp:codePostal": $donnees.IDENTITE.CP, | |
"@type": (if ($donnees.TYPE_ORGANISME | type) == "object" then | |
("boamp:" + ($donnees.TYPE_ORGANISME | keys[0] | ascii_downcase)) else | |
"boamp:Acheteur" end) | |
} else null end), | |
"boamp:objetComplet": $donnees.OBJET.OBJET_COMPLET, | |
#"boamp:criteres":{}, | |
"boamp:dateDecisionAttribution": $donnees.ATTRIBUTION.DATE_DECISION, | |
"boamp:titulaire": ( | |
if ($decision | type) == "object" then | |
if ($decision.TITULAIRE | type) == "object" then | |
{ | |
"@id": ("titulaires:" + ($decision.TITULAIRE.DENOMINATION | gsub("\\W";"-")) + "-" + $decision.TITULAIRE.CP), | |
"boamp:codePostal": $decision.TITULAIRE.CP | |
} | |
else null end | |
elif ($decision | type) == "array" then | |
null else null end), | |
"boamp:valeurTotale": (if ($donnees.ATTRIBUTION.VALEUR_TOTALE | type) == "object" then { | |
"boamp:devise":$donnees.ATTRIBUTION.VALEUR_TOTALE["@DEVISE"], | |
"boamp:montant": (($donnees.ATTRIBUTION.VALEUR_TOTALE["#text"] | tonumber) // null) | |
} else null end), | |
"boamp:montantAttribue": (if ($decision | type) == "object" and ($decision.RENSEIGNEMENT.MONTANT | type) == "object" then { | |
"boamp:devise":$decision.RENSEIGNEMENT.MONTANT["@DEVISE"], | |
"boamp:montant": (($decision.RENSEIGNEMENT.MONTANT["#text"] | tonumber) // null) | |
} else null end) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment