Skip to content

Instantly share code, notes, and snippets.

@shawngraham
Created January 6, 2025 13:51
Show Gist options
  • Save shawngraham/92c86d8c8da19e76517c173673a10831 to your computer and use it in GitHub Desktop.
Save shawngraham/92c86d8c8da19e76517c173673a10831 to your computer and use it in GitHub Desktop.
get images by motif from p-lod
%%capture
!python3 -m pip install git+https://github.com/p-lod/plodlib
!pip install requests_cache
!pip install rdflib
import plodlib
import json
import pandas as pd
from string import Template
import rdflib as rdf
import requests_cache
import requests
def search_images(search_term):
"""
Searches the P-LOD triplestore for resources whose labels
contain a specified term. Returns a list of image URLs
and associated information for those resources.
Args:
search_term (str): The term to search for within labels.
Returns:
list: A list of dictionaries, each containing information about an image
and its associated concept. Returns an empty list if no matching
results are found.
"""
# Connect to the remote triplestore with read-only connection
store = rdf.plugins.stores.sparqlstore.SPARQLStore(query_endpoint = "http://52.170.134.25:3030/plod_endpoint/query",
context_aware = False,
returnFormat = 'json')
g = rdf.Graph(store)
qt = Template("""
PREFIX p-lod: <urn:p-lod:id:>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?concept ?concept_label ?img_urn ?l_record ?l_media ?l_batch ?l_description
WHERE {
?concept rdfs:label ?concept_label .
FILTER regex(?concept_label, "$search_term", "i")
{ ?component p-lod:depicts ?concept .
?component p-lod:best-image ?img_urn .
}
UNION
{
?concept p-lod:best-image ?img_urn .
}
?img_urn p-lod:x-luna-record-id ?l_record .
?img_urn p-lod:x-luna-media-id ?l_media .
?img_urn p-lod:x-luna-batch-id ?l_batch .
?img_urn p-lod:x-luna-description ?l_description .
}
""")
results = g.query(qt.substitute(search_term = search_term))
results_df = pd.DataFrame(results, columns = results.json['head']['vars'])
if len(results_df) == 0:
return []
def add_luna_info_search(row):
img_src = None #default if no URLs present (probably means LUNA doesn't have image though triplestore thinks it does)
img_description = None
tilde_val = plodlib.luna_tilde_val(row['img_urn'])
luna_json = json.loads(requests.get(f'https://umassamherst.lunaimaging.com/luna/servlet/as/fetchMediaSearch?mid=umass~{tilde_val}~{tilde_val}~{row["l_record"]}~{row["l_media"]}&fullData=true').text)
if len(luna_json):
img_attributes = json.loads(luna_json[0]['attributes'])
if 'image_description_english' in img_attributes.keys():
img_description = img_attributes['image_description_english']
else:
try:
if tilde_val == '14':
img_description = json.loads(luna_json[0]['fieldValues'])[2]['value']
elif tilde_val == '16':
img_description = json.loads(luna_json[0]['fieldValues'])[1]['value']
else:
img_description = f"unrecognized collection {tilde_val}"
except:
img_description = "Trying to get description failed"
if 'urlSize4' in img_attributes.keys(): # use size 4, sure, but only if there's nothing else
img_src = img_attributes['urlSize4']
if 'urlSize2' in img_attributes.keys(): # preferred
img_src = img_attributes['urlSize2']
elif 'urlSize3' in img_attributes.keys():
img_src = img_attributes['urlSize3']
else:
img_src = img_attributes['urlSize1']
row['img_url'] = img_src
row['img_current_description'] = img_description
return row
results_df = results_df.apply(add_luna_info_search, axis = 1)
return results_df.to_dict(orient='records')
# Example usage:
search_term = "theseus"
image_results = search_images(search_term)
if image_results:
for item in image_results:
print(f"Concept: {item['concept_label']}")
print(f"Image URL: {item['img_url']}")
print(f"Image Description: {item['img_current_description']}")
print("---")
else:
print(f"No images found for '{search_term}'")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment