zhangce · July 9, 2015 20:28
diff --git a/gistfile1.txt b/gistfile1.txt

 sentences(docid text, sentid text, wordidxs text[], words text[], poses text[], ners text[], lemmas text[], dep_paths text[], dep_parents text[], bounding_boxes text[]).

 sentences_serialized(docid text, sentid text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text).

 documents(docid text, sentids text[], wordidxs text[], words text[], poses text[], ners text[], lemmas text[], dep_paths text[], dep_parents text[], bounding_boxes text[]).

 documents_serialized(docid text, sentids text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text).

 entity_formation_candidate_local (docid text, type text, eid text, entity text, prov text).

 entity_formation_candidate (docid text, type text, eid text, entity text, prov text).

 entity_taxon_candidate_local (docid text, type text, eid text, entity text, author_year text, prov text).

 entity_taxon_candidate (docid text, type text, eid text, entity text, author_year text, prov text).

 entity_location_candidate (docid text, type text, eid text, entity text, prov text).

 entity_temporal_candidate (docid text, type text, eid text, entity text, prov text).

 entity_formation? (docid text, type text, eid text, entity text, prov text).

 entity_taxon? (docid text, type text, eid text, entity text, author_year text, prov text).

 entity_location? (docid text, type text, eid text, entity text, prov text).

 entity_temporal? (docid text, type text, eid text, entity text, prov text).

 relation_candidates (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text, features text).

 relation_formation? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

 relation_formationtemporal? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

 relation_formationlocation? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

 relation_taxonomy? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

 relation_formation_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

 relation_formationtemporal_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

 relation_formationlocation_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

 relation_taxonomy_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

 ddtables (docid text, tableid text, type text, sentid text).

 interval_containments (formation text, child text, parent text).

 interval_not_that_possible(formation text, interval1 text, interval2 text).

 formation_per_doc(docid text, entity text[], type text[]).

 taxon_per_doc(docid text, entity text[], type text[]).

 document_with_formation_entities(docid text, entities text, types text, sentids text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text).

 document_with_taxon_entities(docid text, entities text, types text, sentids text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text).


 /***
 * The following three rules aim at preparing a serialized version of the 
 * sentence and document from the `sentences` table output by Bazaar. If 
 * ddlog could support array as first-class citizen, the following three rules 
 * could be futher simplified. However, for now, we use a way that is similar 
 * to MapReduce/Hadoop to represent tuple as string with user-defined seperateor. 
 * Words are separated by @@@@@, and sentences are separated by |||||.
 *
 * Two relations that will be used later by other rules are `sentences_serialized` 
 * and `documents_serialized`. ddlib contains functions to deserialize these two 
 * tables into python object.
 * 
 * NOTE: These three rules only rely on the output of Bazaar, so it could be reused by
 * different applications as long as it uses Bazaar to produce the output. 
 *
 * NOTE: The ddlib deserialization function acts as the intermidate layer to decouple user
 * program with Bazaar and the following three serialization rules. Even when Bazaar gets
 * changed or the following three rules are changed, as long as the ddlib's deserialization
 * function does not change, the user does not need to change their Python function
 * at all. This type of decoupling is one of the key goals as of the refactoring of July 2015.
 *
 ***/

 // Each word in sentence is separated with @@@@@
 sentences_serialized(
 	docid, 
 	sentid, 
 	array_to_string(wordidxs, "@@@@@"), 
 	array_to_string(words, "@@@@@"), 
 	array_to_string(poses, "@@@@@"), 
 	array_to_string(ners, "@@@@@"), 
 	array_to_string(lemmas, "@@@@@"), 
 	array_to_string(dep_paths, "@@@@@"), 
 	array_to_string(dep_parents, "@@@@@"), 
 	array_to_string(bounding_boxes, "@@@@@")) *
 	:- sentences(docid, sentid, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes).

 // Intermidate table to generate `documents_serialized`.
 documents(
 	docid,
 	ARRAY_AGG(sentid),
 	ARRAY_AGG(wordidxs), 
 	ARRAY_AGG(words), 
 	ARRAY_AGG(poses),
 	ARRAY_AGG(ners), 
 	ARRAY_AGG(lemmas), 
 	ARRAY_AGG(dep_paths),
 	ARRAY_AGG(dep_parents), 
 	ARRAY_AGG(bounding_boxes)) *
 	:- sentences_serialized(docid, sentid, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes).


 // Each sentence is separated with |||||, Each word in sentence is separated with @@@@@
 documents_serialized(
 	docid,
 	array_to_string(sentids, "|||||"),
 	array_to_string(wordidxs, "|||||"),
 	array_to_string(words, "|||||"),
 	array_to_string(poses, "|||||"),
 	array_to_string(ners, "|||||"),
 	array_to_string(lemmas, "|||||"),
 	array_to_string(dep_paths, "|||||"),
 	array_to_string(dep_parents, "|||||"),
 	array_to_string(bounding_boxes, "|||||")) 
 	:- documents(docid, sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes).

 /**
 * Formation that can be decided by only looking at the phrase itself.
 **/
 function ext_entity_formation_local over like sentences_serialized
                 returns like entity_formation
  implementation "/udf/ext_formation_local.py" handles tsv lines.
 entity_formation_candidate_local :- !ext_entity_formation_local(sentences_serialized).

 /**
 * Temporal Interval that can be decided by only looking at the phrase itself.
 **/
 function ext_entity_temporal_local over like sentences_serialized
                 returns like entity_temporal
  implementation "/udf/ext_temporal_local.py" handles tsv lines.
 entity_temporal_candidate :- !ext_entity_temporal_local(sentences_serialized).

 /**
 * Taxon that can be decided by only looking at the phrase itself.
 **/
 function ext_entity_taxon_local over like sentences_serialized
                 returns like entity_taxon
  implementation "/udf/ext_taxon_local.py" handles tsv lines.
 entity_taxon_candidate_local :- !ext_entity_taxon_local(sentences_serialized).

 /**
 * Aggregate local entities into global ones.
 **/
 formation_per_doc(docid, ARRAY_AGG(entity), ARRAY_AGG(type)) :-
    entity_formation_candidate_local(docid, type, eid, entity, prov).

 taxon_per_doc(docid, ARRAY_AGG(entity), ARRAY_AGG(type)) :-
 	entity_taxon_candidate_local(docid, type, eid, entity, author_year, prov).


 /**
 * Extract global formation entities.
 **/
 function ext_entity_formation_global over like document_with_formation_entities
 				  returns like entity_formation
  implementation "/udf/ext_formation_global.py" handles tsv lines.

 document_with_formation_entities(docid, 
  array_to_string(entities, "@@@@@"),
  array_to_string(types, "@@@@@"),
  sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes) 
 :-
  formation_per_doc(docid, entities, types),
  documents_serialized(docid, sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes).

 entity_formation_candidate :- !ext_entity_formation_global(document_with_formation_entities).
 entity_formation_candidate(docid, type, eid, entity, prov) :-
 	entity_formation_candidate_local(docid, type, eid, entity, prov).

 /**
 * Extract global taxon entities.
 **/
 function ext_entity_taxon_global over like document_with_taxon_entities
 				  returns like entity_taxon
  implementation "/udf/ext_taxon_global.py" handles tsv lines.

 document_with_taxon_entities(docid, 
  array_to_string(entities, "@@@@@"),
  array_to_string(types, "@@@@@"),
  sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes) 
 :-
  taxon_per_doc(docid, entities, types),
  documents_serialized(docid, sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes).

 entity_taxon_candidate :- !ext_entity_taxon_global(document_with_taxon_entities).
 entity_taxon_candidate(docid, type, eid, entity, author_year, prov) :-
 	entity_taxon_candidate_local(docid, type, eid, entity, author_year, prov).


 entity_formation(docid, type, eid, entity, prov) :-
  entity_formation_candidate(docid, type, eid, entity, prov)
  weight = prov
  semantics = Imply.

 entity_temporal(docid, type, eid, entity, prov) :-
  entity_formation_candidate(docid, type, eid, entity, prov)
  weight = prov
  semantics = Imply.

 entity_taxon(docid, type, eid, entity, author_year, prov) :-
  entity_taxon_candidate(docid, type, eid, entity, author_year, prov)
  weight = prov
  semantics = Imply.


 //has_spouse(rid) :- has_spouse_candidates(a, b, c, d, rid, l) label = l.

	sentences(docid text, sentid text, wordidxs text[], words text[], poses text[], ners text[], lemmas text[], dep_paths text[], dep_parents text[], bounding_boxes text[]).

	sentences_serialized(docid text, sentid text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text).

	documents(docid text, sentids text[], wordidxs text[], words text[], poses text[], ners text[], lemmas text[], dep_paths text[], dep_parents text[], bounding_boxes text[]).

	documents_serialized(docid text, sentids text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text).

	entity_formation_candidate_local (docid text, type text, eid text, entity text, prov text).

	entity_formation_candidate (docid text, type text, eid text, entity text, prov text).

	entity_taxon_candidate_local (docid text, type text, eid text, entity text, author_year text, prov text).

	entity_taxon_candidate (docid text, type text, eid text, entity text, author_year text, prov text).

	entity_location_candidate (docid text, type text, eid text, entity text, prov text).

	entity_temporal_candidate (docid text, type text, eid text, entity text, prov text).

	entity_formation? (docid text, type text, eid text, entity text, prov text).

	entity_taxon? (docid text, type text, eid text, entity text, author_year text, prov text).

	entity_location? (docid text, type text, eid text, entity text, prov text).

	entity_temporal? (docid text, type text, eid text, entity text, prov text).

	relation_candidates (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text, features text).

	relation_formation? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

	relation_formationtemporal? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

	relation_formationlocation? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

	relation_taxonomy? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

	relation_formation_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

	relation_formationtemporal_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

	relation_formationlocation_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

	relation_taxonomy_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text).

	ddtables (docid text, tableid text, type text, sentid text).

	interval_containments (formation text, child text, parent text).

	interval_not_that_possible(formation text, interval1 text, interval2 text).

	formation_per_doc(docid text, entity text[], type text[]).

	taxon_per_doc(docid text, entity text[], type text[]).

	document_with_formation_entities(docid text, entities text, types text, sentids text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text).

	document_with_taxon_entities(docid text, entities text, types text, sentids text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text).


	/***
	* The following three rules aim at preparing a serialized version of the
	* sentence and document from the `sentences` table output by Bazaar. If
	* ddlog could support array as first-class citizen, the following three rules
	* could be futher simplified. However, for now, we use a way that is similar
	* to MapReduce/Hadoop to represent tuple as string with user-defined seperateor.
	* Words are separated by @@@@@, and sentences are separated by \|\|\|\|\|.
	*
	* Two relations that will be used later by other rules are `sentences_serialized`
	* and `documents_serialized`. ddlib contains functions to deserialize these two
	* tables into python object.
	*
	* NOTE: These three rules only rely on the output of Bazaar, so it could be reused by
	* different applications as long as it uses Bazaar to produce the output.
	*
	* NOTE: The ddlib deserialization function acts as the intermidate layer to decouple user
	* program with Bazaar and the following three serialization rules. Even when Bazaar gets
	* changed or the following three rules are changed, as long as the ddlib's deserialization
	* function does not change, the user does not need to change their Python function
	* at all. This type of decoupling is one of the key goals as of the refactoring of July 2015.
	*
	***/

	// Each word in sentence is separated with @@@@@
	sentences_serialized(
	docid,
	sentid,
	array_to_string(wordidxs, "@@@@@"),
	array_to_string(words, "@@@@@"),
	array_to_string(poses, "@@@@@"),
	array_to_string(ners, "@@@@@"),
	array_to_string(lemmas, "@@@@@"),
	array_to_string(dep_paths, "@@@@@"),
	array_to_string(dep_parents, "@@@@@"),
	array_to_string(bounding_boxes, "@@@@@")) *
	:- sentences(docid, sentid, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes).

	// Intermidate table to generate `documents_serialized`.
	documents(
	docid,
	ARRAY_AGG(sentid),
	ARRAY_AGG(wordidxs),
	ARRAY_AGG(words),
	ARRAY_AGG(poses),
	ARRAY_AGG(ners),
	ARRAY_AGG(lemmas),
	ARRAY_AGG(dep_paths),
	ARRAY_AGG(dep_parents),
	ARRAY_AGG(bounding_boxes)) *
	:- sentences_serialized(docid, sentid, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes).


	// Each sentence is separated with \|\|\|\|\|, Each word in sentence is separated with @@@@@
	documents_serialized(
	docid,
	array_to_string(sentids, "\|\|\|\|\|"),
	array_to_string(wordidxs, "\|\|\|\|\|"),
	array_to_string(words, "\|\|\|\|\|"),
	array_to_string(poses, "\|\|\|\|\|"),
	array_to_string(ners, "\|\|\|\|\|"),
	array_to_string(lemmas, "\|\|\|\|\|"),
	array_to_string(dep_paths, "\|\|\|\|\|"),
	array_to_string(dep_parents, "\|\|\|\|\|"),
	array_to_string(bounding_boxes, "\|\|\|\|\|"))
	:- documents(docid, sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes).

	/**
	* Formation that can be decided by only looking at the phrase itself.
	**/
	function ext_entity_formation_local over like sentences_serialized
	returns like entity_formation
	implementation "/udf/ext_formation_local.py" handles tsv lines.
	entity_formation_candidate_local :- !ext_entity_formation_local(sentences_serialized).

	/**
	* Temporal Interval that can be decided by only looking at the phrase itself.
	**/
	function ext_entity_temporal_local over like sentences_serialized
	returns like entity_temporal
	implementation "/udf/ext_temporal_local.py" handles tsv lines.
	entity_temporal_candidate :- !ext_entity_temporal_local(sentences_serialized).

	/**
	* Taxon that can be decided by only looking at the phrase itself.
	**/
	function ext_entity_taxon_local over like sentences_serialized
	returns like entity_taxon
	implementation "/udf/ext_taxon_local.py" handles tsv lines.
	entity_taxon_candidate_local :- !ext_entity_taxon_local(sentences_serialized).

	/**
	* Aggregate local entities into global ones.
	**/
	formation_per_doc(docid, ARRAY_AGG(entity), ARRAY_AGG(type)) :-
	entity_formation_candidate_local(docid, type, eid, entity, prov).

	taxon_per_doc(docid, ARRAY_AGG(entity), ARRAY_AGG(type)) :-
	entity_taxon_candidate_local(docid, type, eid, entity, author_year, prov).


	/**
	* Extract global formation entities.
	**/
	function ext_entity_formation_global over like document_with_formation_entities
	returns like entity_formation
	implementation "/udf/ext_formation_global.py" handles tsv lines.

	document_with_formation_entities(docid,
	array_to_string(entities, "@@@@@"),
	array_to_string(types, "@@@@@"),
	sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes)
	:-
	formation_per_doc(docid, entities, types),
	documents_serialized(docid, sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes).

	entity_formation_candidate :- !ext_entity_formation_global(document_with_formation_entities).
	entity_formation_candidate(docid, type, eid, entity, prov) :-
	entity_formation_candidate_local(docid, type, eid, entity, prov).

	/**
	* Extract global taxon entities.
	**/
	function ext_entity_taxon_global over like document_with_taxon_entities
	returns like entity_taxon
	implementation "/udf/ext_taxon_global.py" handles tsv lines.

	document_with_taxon_entities(docid,
	array_to_string(entities, "@@@@@"),
	array_to_string(types, "@@@@@"),
	sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes)
	:-
	taxon_per_doc(docid, entities, types),
	documents_serialized(docid, sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes).

	entity_taxon_candidate :- !ext_entity_taxon_global(document_with_taxon_entities).
	entity_taxon_candidate(docid, type, eid, entity, author_year, prov) :-
	entity_taxon_candidate_local(docid, type, eid, entity, author_year, prov).


	entity_formation(docid, type, eid, entity, prov) :-
	entity_formation_candidate(docid, type, eid, entity, prov)
	weight = prov
	semantics = Imply.

	entity_temporal(docid, type, eid, entity, prov) :-
	entity_formation_candidate(docid, type, eid, entity, prov)
	weight = prov
	semantics = Imply.

	entity_taxon(docid, type, eid, entity, author_year, prov) :-
	entity_taxon_candidate(docid, type, eid, entity, author_year, prov)
	weight = prov
	semantics = Imply.


	//has_spouse(rid) :- has_spouse_candidates(a, b, c, d, rid, l) label = l.
No results found