Created
January 6, 2025 13:51
-
-
Save shawngraham/92c86d8c8da19e76517c173673a10831 to your computer and use it in GitHub Desktop.
get images by motif from p-lod
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%capture | |
!python3 -m pip install git+https://github.com/p-lod/plodlib | |
!pip install requests_cache | |
!pip install rdflib | |
import plodlib | |
import json | |
import pandas as pd | |
from string import Template | |
import rdflib as rdf | |
import requests_cache | |
import requests | |
def search_images(search_term): | |
""" | |
Searches the P-LOD triplestore for resources whose labels | |
contain a specified term. Returns a list of image URLs | |
and associated information for those resources. | |
Args: | |
search_term (str): The term to search for within labels. | |
Returns: | |
list: A list of dictionaries, each containing information about an image | |
and its associated concept. Returns an empty list if no matching | |
results are found. | |
""" | |
# Connect to the remote triplestore with read-only connection | |
store = rdf.plugins.stores.sparqlstore.SPARQLStore(query_endpoint = "http://52.170.134.25:3030/plod_endpoint/query", | |
context_aware = False, | |
returnFormat = 'json') | |
g = rdf.Graph(store) | |
qt = Template(""" | |
PREFIX p-lod: <urn:p-lod:id:> | |
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> | |
SELECT DISTINCT ?concept ?concept_label ?img_urn ?l_record ?l_media ?l_batch ?l_description | |
WHERE { | |
?concept rdfs:label ?concept_label . | |
FILTER regex(?concept_label, "$search_term", "i") | |
{ ?component p-lod:depicts ?concept . | |
?component p-lod:best-image ?img_urn . | |
} | |
UNION | |
{ | |
?concept p-lod:best-image ?img_urn . | |
} | |
?img_urn p-lod:x-luna-record-id ?l_record . | |
?img_urn p-lod:x-luna-media-id ?l_media . | |
?img_urn p-lod:x-luna-batch-id ?l_batch . | |
?img_urn p-lod:x-luna-description ?l_description . | |
} | |
""") | |
results = g.query(qt.substitute(search_term = search_term)) | |
results_df = pd.DataFrame(results, columns = results.json['head']['vars']) | |
if len(results_df) == 0: | |
return [] | |
def add_luna_info_search(row): | |
img_src = None #default if no URLs present (probably means LUNA doesn't have image though triplestore thinks it does) | |
img_description = None | |
tilde_val = plodlib.luna_tilde_val(row['img_urn']) | |
luna_json = json.loads(requests.get(f'https://umassamherst.lunaimaging.com/luna/servlet/as/fetchMediaSearch?mid=umass~{tilde_val}~{tilde_val}~{row["l_record"]}~{row["l_media"]}&fullData=true').text) | |
if len(luna_json): | |
img_attributes = json.loads(luna_json[0]['attributes']) | |
if 'image_description_english' in img_attributes.keys(): | |
img_description = img_attributes['image_description_english'] | |
else: | |
try: | |
if tilde_val == '14': | |
img_description = json.loads(luna_json[0]['fieldValues'])[2]['value'] | |
elif tilde_val == '16': | |
img_description = json.loads(luna_json[0]['fieldValues'])[1]['value'] | |
else: | |
img_description = f"unrecognized collection {tilde_val}" | |
except: | |
img_description = "Trying to get description failed" | |
if 'urlSize4' in img_attributes.keys(): # use size 4, sure, but only if there's nothing else | |
img_src = img_attributes['urlSize4'] | |
if 'urlSize2' in img_attributes.keys(): # preferred | |
img_src = img_attributes['urlSize2'] | |
elif 'urlSize3' in img_attributes.keys(): | |
img_src = img_attributes['urlSize3'] | |
else: | |
img_src = img_attributes['urlSize1'] | |
row['img_url'] = img_src | |
row['img_current_description'] = img_description | |
return row | |
results_df = results_df.apply(add_luna_info_search, axis = 1) | |
return results_df.to_dict(orient='records') | |
# Example usage: | |
search_term = "theseus" | |
image_results = search_images(search_term) | |
if image_results: | |
for item in image_results: | |
print(f"Concept: {item['concept_label']}") | |
print(f"Image URL: {item['img_url']}") | |
print(f"Image Description: {item['img_current_description']}") | |
print("---") | |
else: | |
print(f"No images found for '{search_term}'") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment