Created
March 1, 2015 22:40
-
-
Save pebbie/a3f25fa74d293d8073e8 to your computer and use it in GitHub Desktop.
Treat a NumPy array as a graph in RDFLib
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
rastore - virtual RDF data store for rasters | |
a graph interface from a numpy array into RDF Data Cube | |
- shape (dimensions) | |
- element datatype | |
- elements as rdf list? | |
- slices? as structured naming of bnode or a slice object? | |
list of pixels with position as dimension and value as measure ? | |
e.g. | |
:e_0_0 a qb:Observation | |
:dim_0 0, | |
:dim_1 0, | |
:value 255. | |
array can be from opencv or gdal | |
""" | |
from rdflib.store import Store | |
from rdflib import Graph, XSD, RDF, RDFS, OWL, URIRef, Namespace, BNode, Literal | |
import urllib.parse as urlparse | |
import numpy as np | |
def add_list(graph, triples): | |
next_node = BNode() | |
start_node = next_node | |
for i,item in enumerate(triples): | |
graph.add(item) | |
node = next_node | |
graph.add((node, RDF.first, item[0])) | |
if i==len(triples)-1: | |
next_node = RDF.nil | |
else: | |
next_node = BNode() | |
graph.add((node, RDF.rest, next_node)) | |
return start_node | |
QB = Namespace('http://purl.org/linked-data/cube#') | |
ARR = Namespace('http://pebbie.org/ns/array#') | |
THIS = Namespace('#') | |
class RasterStore(Store): | |
""" | |
Raster Virtual RDF Store | |
""" | |
def __init__(self,array,configuration=None): | |
super(RasterStore, self).__init__(configuration) | |
self.arr=array | |
self.__namespace = {} | |
self.__prefix = {} | |
self.__len = None | |
self._g = Graph() | |
self.read_array_meta() | |
def read_array_meta(self): | |
g = self._g | |
arr_node = URIRef('#this') | |
arr_struct = BNode() | |
g.add((arr_node, RDF.type, QB.Dataset)) | |
g.add((arr_node, QB.structure, arr_struct)) | |
g.add((arr_struct, RDF.type, QB.DataStructureDefinition)) | |
self.dimensions = [] | |
for i,v in enumerate(list(self.arr.shape)): | |
dim_name = 'dim_{}'.format(i) | |
dim = BNode(dim_name) | |
self.dimensions.append(dim_name) | |
g.add((arr_struct, QB.component, dim)) | |
g.add((dim, QB.dimension, THIS[dim_name])) | |
g.add((dim, QB.order, Literal(i+1))) | |
#abbreviated | |
#g.add((THIS[dim_name], RDF.type, RDF.Property)) | |
#g.add((THIS[dim_name], RDF.type, QB.DimensionProperty)) | |
g.add((THIS[dim_name], RDFS.range, XSD.integer)) | |
""" | |
#TODO:how to explicitly indicating dimension interval limit | |
interval = BNode() | |
g.add((THIS[dim_name], RDFS.range, interval)) | |
g.add((interval, RDF.type, OWL.Restriction)) | |
ivalues = BNode() | |
g.add((interval, OWL.allValuesFrom, ivalues)) | |
g.add((ivalues, RDF.type, RDFS.Datatype)) | |
g.add((ivalues, OWL.onDatatype, XSD.int)) | |
g.add((interval, OWL.onProperty, THIS[dim_name])) | |
rnode = add_list(g, [(BNode(), XSD.minInclusive, Literal(0)), (BNode(), XSD.maxExclusive, Literal(self.arr.shape[i]))]) | |
g.add((ivalues, OWL.withRestrictions, rnode)) | |
""" | |
measure = THIS.value | |
mnode = BNode() | |
g.add((arr_struct, QB.component, mnode)) | |
g.add((mnode, QB.measure, measure)) | |
#abbreviated | |
#g.add((measure, RDF.type, RDF.Property)) | |
#g.add((measure, RDF.type, QB.MeasureProperty)) | |
if self.arr.dtype in [np.uint8, np.uint32, np.int32]: | |
g.add((measure, RDFS.range, XSD.int)) | |
elif self.arr.dtype in [np.float32, np.float64]: | |
g.add((measure, RDFS.range, XSD.decimal)) | |
""" | |
idx = np.where(self.arr==self.arr) | |
for obs in zip(self.arr.flat, *idx): | |
#print(THIS['e_'+'_'.join(map(str, list(obs)[1:]))]) | |
el = THIS['e_'+'_'.join(map(str, list(obs)[1:]))] | |
#g.add((el, RDF.type, QB.Observation)) | |
g.add((el, QB.dataset, THIS.this)) | |
g.add((el, THIS.value, Literal(obs[0]))) | |
for i,v in enumerate(list(obs)[1:]): | |
#print(v, type(v)) | |
g.add((el, THIS[self.dimensions[i]], Literal(v.item()))) | |
""" | |
def add(self, triple_pattern, context, quoted=False): | |
subj, pred, obj = triple_pattern | |
if subj is not None and '#e_' in subj.toPython(): | |
sp = tuple(map(int, subj.toPython()[3:].split('_'))) | |
self.arr[sp] = obj.toPython() | |
#update the pixel value if triples added is a pixel pattern | |
#throw NotImplementedError('Linked Data Fragment is immutable') | |
def remove(self, triple_pattern, context=None): | |
pass | |
#triples cannot be removed but can only be modified | |
#throw NotImplementedError('Linked Data Fragment is immutable') | |
def triples(self, triple_pattern, context=None): | |
subj, pred, obj = triple_pattern | |
for triple in self._g.triples(triple_pattern): | |
yield triple, self.__contexts() | |
idx = np.where(self.arr==self.arr) | |
if subj is None and pred is None and obj is None: | |
for obs in zip(self.arr.flat, *idx): | |
el = THIS['e_'+'_'.join(map(str, list(obs)[1:]))] | |
yield (el, QB.dataset, THIS.this), self.__contexts() | |
yield (el, THIS.value, Literal(obs[0])), self.__contexts() | |
for i,v in enumerate(list(obs)[1:]): | |
yield (el, THIS[self.dimensions[i]], Literal(v.item())), self.__contexts() | |
elif subj is not None and '#e_' in subj.toPython(): | |
sp = tuple(map(int, subj.toPython()[3:].split('_'))) | |
if pred is None or pred == QB.dataset: | |
yield (subj, QB.dataset, THIS.this), self.__contexts() | |
if pred is None or pred == THIS.value: | |
yield (subj, THIS.value, Literal(self.arr[sp])), self.__contexts() | |
for i,v in enumerate(list(sp)): | |
yield (subj, THIS[self.dimensions[i]], Literal(v)), self.__contexts() | |
def __len__(self, context=None): | |
return len(self._g)+self.arr.size*(2+len(list(self.arr.shape))) #amount of virtual rdf triples from array elements | |
def bind(self, prefix, namespace): | |
self.__prefix[namespace] = prefix | |
self.__namespace[prefix] = namespace | |
def namespace(self, prefix): | |
return self.__namespace.get(prefix, None) | |
def prefix(self, namespace): | |
return self.__prefix.get(namespace, None) | |
def namespaces(self): | |
for prefix, namespace in iter(self.__namespace.items()): | |
yield prefix, namespace | |
def __contexts(self): | |
return (c for c in []) | |
def set_value(graph, newval, idx): | |
graph.add((THIS['e_'+'_'.join(map(str, list(idx)))], THIS.value, Literal(newval))) | |
if __name__ == "__main__": | |
z = np.zeros((2,4)) | |
g = Graph(store=RasterStore(z)) | |
print (len(g)) | |
#propagate triple addition as array element updating | |
set_value(g, 10, (1,1)) | |
#serialize as RDF | |
g.bind('qb',QB[''].toPython()) | |
g.bind('owl',OWL[''].toPython()) | |
print(g.serialize(format='n3').decode('utf-8')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
output of the serialization