Created
July 9, 2015 20:28
-
-
Save zhangce/b1f5adc6d04b082cb053 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| sentences(docid text, sentid text, wordidxs text[], words text[], poses text[], ners text[], lemmas text[], dep_paths text[], dep_parents text[], bounding_boxes text[]). | |
| sentences_serialized(docid text, sentid text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text). | |
| documents(docid text, sentids text[], wordidxs text[], words text[], poses text[], ners text[], lemmas text[], dep_paths text[], dep_parents text[], bounding_boxes text[]). | |
| documents_serialized(docid text, sentids text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text). | |
| entity_formation_candidate_local (docid text, type text, eid text, entity text, prov text). | |
| entity_formation_candidate (docid text, type text, eid text, entity text, prov text). | |
| entity_taxon_candidate_local (docid text, type text, eid text, entity text, author_year text, prov text). | |
| entity_taxon_candidate (docid text, type text, eid text, entity text, author_year text, prov text). | |
| entity_location_candidate (docid text, type text, eid text, entity text, prov text). | |
| entity_temporal_candidate (docid text, type text, eid text, entity text, prov text). | |
| entity_formation? (docid text, type text, eid text, entity text, prov text). | |
| entity_taxon? (docid text, type text, eid text, entity text, author_year text, prov text). | |
| entity_location? (docid text, type text, eid text, entity text, prov text). | |
| entity_temporal? (docid text, type text, eid text, entity text, prov text). | |
| relation_candidates (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text, features text). | |
| relation_formation? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text). | |
| relation_formationtemporal? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text). | |
| relation_formationlocation? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text). | |
| relation_taxonomy? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text). | |
| relation_formation_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text). | |
| relation_formationtemporal_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text). | |
| relation_formationlocation_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text). | |
| relation_taxonomy_global? (docid text, type text, eid1 text, eid2 text, entity1 text, entity2 text). | |
| ddtables (docid text, tableid text, type text, sentid text). | |
| interval_containments (formation text, child text, parent text). | |
| interval_not_that_possible(formation text, interval1 text, interval2 text). | |
| formation_per_doc(docid text, entity text[], type text[]). | |
| taxon_per_doc(docid text, entity text[], type text[]). | |
| document_with_formation_entities(docid text, entities text, types text, sentids text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text). | |
| document_with_taxon_entities(docid text, entities text, types text, sentids text, wordidxs text, words text, poses text, ners text, lemmas text, dep_paths text, dep_parents text, bounding_boxes text). | |
| /*** | |
| * The following three rules aim at preparing a serialized version of the | |
| * sentence and document from the `sentences` table output by Bazaar. If | |
| * ddlog could support array as first-class citizen, the following three rules | |
| * could be futher simplified. However, for now, we use a way that is similar | |
| * to MapReduce/Hadoop to represent tuple as string with user-defined seperateor. | |
| * Words are separated by @@@@@, and sentences are separated by |||||. | |
| * | |
| * Two relations that will be used later by other rules are `sentences_serialized` | |
| * and `documents_serialized`. ddlib contains functions to deserialize these two | |
| * tables into python object. | |
| * | |
| * NOTE: These three rules only rely on the output of Bazaar, so it could be reused by | |
| * different applications as long as it uses Bazaar to produce the output. | |
| * | |
| * NOTE: The ddlib deserialization function acts as the intermidate layer to decouple user | |
| * program with Bazaar and the following three serialization rules. Even when Bazaar gets | |
| * changed or the following three rules are changed, as long as the ddlib's deserialization | |
| * function does not change, the user does not need to change their Python function | |
| * at all. This type of decoupling is one of the key goals as of the refactoring of July 2015. | |
| * | |
| ***/ | |
| // Each word in sentence is separated with @@@@@ | |
| sentences_serialized( | |
| docid, | |
| sentid, | |
| array_to_string(wordidxs, "@@@@@"), | |
| array_to_string(words, "@@@@@"), | |
| array_to_string(poses, "@@@@@"), | |
| array_to_string(ners, "@@@@@"), | |
| array_to_string(lemmas, "@@@@@"), | |
| array_to_string(dep_paths, "@@@@@"), | |
| array_to_string(dep_parents, "@@@@@"), | |
| array_to_string(bounding_boxes, "@@@@@")) * | |
| :- sentences(docid, sentid, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes). | |
| // Intermidate table to generate `documents_serialized`. | |
| documents( | |
| docid, | |
| ARRAY_AGG(sentid), | |
| ARRAY_AGG(wordidxs), | |
| ARRAY_AGG(words), | |
| ARRAY_AGG(poses), | |
| ARRAY_AGG(ners), | |
| ARRAY_AGG(lemmas), | |
| ARRAY_AGG(dep_paths), | |
| ARRAY_AGG(dep_parents), | |
| ARRAY_AGG(bounding_boxes)) * | |
| :- sentences_serialized(docid, sentid, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes). | |
| // Each sentence is separated with |||||, Each word in sentence is separated with @@@@@ | |
| documents_serialized( | |
| docid, | |
| array_to_string(sentids, "|||||"), | |
| array_to_string(wordidxs, "|||||"), | |
| array_to_string(words, "|||||"), | |
| array_to_string(poses, "|||||"), | |
| array_to_string(ners, "|||||"), | |
| array_to_string(lemmas, "|||||"), | |
| array_to_string(dep_paths, "|||||"), | |
| array_to_string(dep_parents, "|||||"), | |
| array_to_string(bounding_boxes, "|||||")) | |
| :- documents(docid, sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes). | |
| /** | |
| * Formation that can be decided by only looking at the phrase itself. | |
| **/ | |
| function ext_entity_formation_local over like sentences_serialized | |
| returns like entity_formation | |
| implementation "/udf/ext_formation_local.py" handles tsv lines. | |
| entity_formation_candidate_local :- !ext_entity_formation_local(sentences_serialized). | |
| /** | |
| * Temporal Interval that can be decided by only looking at the phrase itself. | |
| **/ | |
| function ext_entity_temporal_local over like sentences_serialized | |
| returns like entity_temporal | |
| implementation "/udf/ext_temporal_local.py" handles tsv lines. | |
| entity_temporal_candidate :- !ext_entity_temporal_local(sentences_serialized). | |
| /** | |
| * Taxon that can be decided by only looking at the phrase itself. | |
| **/ | |
| function ext_entity_taxon_local over like sentences_serialized | |
| returns like entity_taxon | |
| implementation "/udf/ext_taxon_local.py" handles tsv lines. | |
| entity_taxon_candidate_local :- !ext_entity_taxon_local(sentences_serialized). | |
| /** | |
| * Aggregate local entities into global ones. | |
| **/ | |
| formation_per_doc(docid, ARRAY_AGG(entity), ARRAY_AGG(type)) :- | |
| entity_formation_candidate_local(docid, type, eid, entity, prov). | |
| taxon_per_doc(docid, ARRAY_AGG(entity), ARRAY_AGG(type)) :- | |
| entity_taxon_candidate_local(docid, type, eid, entity, author_year, prov). | |
| /** | |
| * Extract global formation entities. | |
| **/ | |
| function ext_entity_formation_global over like document_with_formation_entities | |
| returns like entity_formation | |
| implementation "/udf/ext_formation_global.py" handles tsv lines. | |
| document_with_formation_entities(docid, | |
| array_to_string(entities, "@@@@@"), | |
| array_to_string(types, "@@@@@"), | |
| sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes) | |
| :- | |
| formation_per_doc(docid, entities, types), | |
| documents_serialized(docid, sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes). | |
| entity_formation_candidate :- !ext_entity_formation_global(document_with_formation_entities). | |
| entity_formation_candidate(docid, type, eid, entity, prov) :- | |
| entity_formation_candidate_local(docid, type, eid, entity, prov). | |
| /** | |
| * Extract global taxon entities. | |
| **/ | |
| function ext_entity_taxon_global over like document_with_taxon_entities | |
| returns like entity_taxon | |
| implementation "/udf/ext_taxon_global.py" handles tsv lines. | |
| document_with_taxon_entities(docid, | |
| array_to_string(entities, "@@@@@"), | |
| array_to_string(types, "@@@@@"), | |
| sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes) | |
| :- | |
| taxon_per_doc(docid, entities, types), | |
| documents_serialized(docid, sentids, wordidxs, words, poses, ners, lemmas, dep_paths, dep_parents, bounding_boxes). | |
| entity_taxon_candidate :- !ext_entity_taxon_global(document_with_taxon_entities). | |
| entity_taxon_candidate(docid, type, eid, entity, author_year, prov) :- | |
| entity_taxon_candidate_local(docid, type, eid, entity, author_year, prov). | |
| entity_formation(docid, type, eid, entity, prov) :- | |
| entity_formation_candidate(docid, type, eid, entity, prov) | |
| weight = prov | |
| semantics = Imply. | |
| entity_temporal(docid, type, eid, entity, prov) :- | |
| entity_formation_candidate(docid, type, eid, entity, prov) | |
| weight = prov | |
| semantics = Imply. | |
| entity_taxon(docid, type, eid, entity, author_year, prov) :- | |
| entity_taxon_candidate(docid, type, eid, entity, author_year, prov) | |
| weight = prov | |
| semantics = Imply. | |
| //has_spouse(rid) :- has_spouse_candidates(a, b, c, d, rid, l) label = l. | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment