Created
October 4, 2011 09:53
-
-
Save pudo/1261271 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Proposed OpenSpending data model, v2 */ | |
/* Core issues we want to address: | |
* | |
* - Merge "entities" and "classifiers". | |
* - Use better terminology. | |
* - Decide whether to still have value dimensions. | |
* - Handle time better (no "end_column") | |
* | |
* Secondary but long-term goals: | |
* | |
* - Dimension hierarchies. | |
* - Soft typing of dimensions. | |
*/ | |
var model_proposal_fl = { | |
/* Dataset metadata */ | |
"dataset": { | |
/* Name, needs to have a number of forbidden words. */ | |
"name": "test", | |
/* Label, a free-form description. */ | |
"label": "Example Dataset", | |
"description": "I'm an example dataset, not much to see here.", | |
/* Set of entry properties sufficient to make each entry unique in | |
* the dataset. */ | |
"unique_keys": ["transaction_id"], | |
}, | |
"mapping": { | |
"amount": { | |
"label": "Total amount", | |
"description": "...", | |
/* New type of field in mapping: */ | |
"type": "measure", | |
/* Don't do float for money any longer: */ | |
"datatype": "decimal", | |
"column": "amount" | |
/* TODO: Should this have support for default values and | |
* constant values? */ | |
}, | |
/* Secondary measure: */ | |
"cofinance_amount": { | |
"label": "Co-financed amount", | |
"description": "...", | |
/* New type of field in mapping: */ | |
"type": "measure", | |
/* Don't do float for money any longer: */ | |
"datatype": "decimal", | |
"column": "cofinance" | |
}, | |
/* Time dimension. Note it is not called "time". Good idea? */ | |
"grant_date": { | |
"label": "Date of grant", | |
"description": "...", | |
/* Optional: type for field in mapping. */ | |
"type": "date", | |
"datatype": "date", | |
/* NOTE | |
* This is a bastard type: the datatype should first yield python date | |
* objects but then we'll store something else to the database: either | |
* a DATETIME column or (cleaner) a proper date table with multiple | |
* fields: | |
* | |
* date_id, year, month, quarter, day | |
*/ | |
"column": "date_grant" | |
/* NOTE | |
* I propose we abolish the "end_column" hack and have multiple date | |
* dimensions if necessary: grant_date, disbursed_date, project_end. | |
*/ | |
}, | |
/* Have simple attribute dimensions for non-compound entities with many | |
* different values: */ | |
"transaction_id": { | |
"label": "Transaction ID", | |
"description": "...", | |
"facet": false, | |
/* Could also be "attribute": */ | |
"type": "value", | |
"column": "transaction_id", | |
/* This is assumed by default: */ | |
"datatype": "string" | |
}, | |
/* The thing formerly known as an entity: */ | |
"beneficiary": { | |
"label": "Beneficiary", | |
"description": "...", | |
/* This is assumed as a default, supersedes "type": "entity": */ | |
"scheme": "entity", | |
/* Just an idea, hard to actually enumerate: */ | |
"classes": ["individual", "company", "nonprofit"], | |
"facet": true, | |
/* IMO these can also have this - but nick will disagree? */ | |
"unqiue_keys": ["label", "country"], | |
"attributes": { | |
/* Always enforce presence of a name attribute? */ | |
"name": { | |
/* Re-name ID */ | |
"datatype": "slug", | |
"column": "beneficiary" | |
}, | |
"label": { | |
"datatype": "string", | |
"column": "beneficiary" | |
}, | |
"country": { | |
"datatype": "string", | |
"column": "beneficiary_country" | |
} | |
} | |
}, | |
/* The thing formerly known as a classifier: */ | |
"objective": { | |
"label": "Objective (Level 1)", | |
"description": "...", | |
"scheme": "funding-taxonomy", | |
"facet": true, | |
"attributes": { | |
"name": { | |
"datatype": "slug", | |
"column": "objective" | |
}, | |
"label": { | |
"datatype": "string", | |
"column": "objective" | |
} | |
} | |
}, | |
/* Support hierarchies of classifiers: */ | |
"goal": { | |
"label": "Goal (Level 2)", | |
"description": "...", | |
"scheme": "funding-taxonomy", | |
/* Proposal: let's have the parent classifier given by name and specify | |
* a column on this dimension that refers back to the higher-level | |
* dimension. The latter is not strictly necessary, I think. | |
*/ | |
"parent": {"objective": "objective_name"}, | |
"attributes": { | |
"name": { | |
"datatype": "slug", | |
"column": "goal" | |
}, | |
"label": { | |
"datatype": "string", | |
"column": "goal" | |
} | |
"objective_name": { | |
"datatype": "slug", | |
"column": "objective" | |
}, | |
} | |
}, | |
/* | |
* | |
* ALTERNATIVES | |
* | |
*/ | |
/* | |
* mk270: remove "Value Dimensions" completely, have all dimensions with | |
* attributes: | |
*/ | |
"transaction_id": { | |
"label": "Transaction ID", | |
"description": "...", | |
"facet": false, | |
"attributes": [{ | |
/* Could also be "attribute": */ | |
"column": "transaction_id", | |
/* This is assumed by default: */ | |
"datatype": "string" | |
}] | |
/* Criticism: | |
* 1) Utility: its nice and easy to have attributes on entries, although | |
* not strictly clean. | |
* 2) Makes nicer hash representation of attributes impossible. | |
* 3) How do you enforce there's only one? | |
* 4) Given that "value dimensions" can be generated by the same code as | |
* "dimension attributes" - is it really easier to generate this? | |
*/ | |
}, | |
/* | |
* pudo: separate "Dimension Model" from "Source file mapping": | |
* | |
* -> This is one level up, mentally: | |
*/ | |
/* This is fully abstract, e.g. for all of ERDF, ESF or some such collection | |
* of data sets: | |
*/ | |
"model": { | |
"measure1": { /* Full description. */ } | |
"measure2": { /* Full description. */ } | |
"dimension1": { /* Full description with attributes. */ } | |
"dimension2": { /* Full description with attributes. */ } | |
"dimension3": { /* Full description with attributes. */ } | |
"dimension4": { /* Full description with attributes. */ } | |
} | |
/* This is specific to one CSV representation. */ | |
"mapping": { | |
"measure1": {"column": "measure", "datatype": "decimal"} | |
"measure2": {"column": "other_measure", "datatype": "decimal"} | |
"dimension1.attrib1": {"column": "foo", "datatype": "string"} | |
"dimension1.attrib2": {"column": "bar", "datatype": "string"} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment