pudo · October 4, 2011 09:53
diff --git a/model2.js b/model2.js
 /* Proposed OpenSpending data model, v2 */

 /* Core issues we want to address: 
 *
 * - Merge "entities" and "classifiers".
 * - Use better terminology.
 * - Decide whether to still have value dimensions.
 * - Handle time better (no "end_column")
 *
 * Secondary but long-term goals:
 *
 * - Dimension hierarchies.
 * - Soft typing of dimensions.
 */

 var model_proposal_fl = {
  /* Dataset metadata */
  "dataset": {
    /* Name, needs to have a number of forbidden words. */
    "name": "test",
    /* Label, a free-form description. */
    "label": "Example Dataset",
    "description": "I'm an example dataset, not much to see here.",
    /* Set of entry properties sufficient to make each entry unique in 
    * the dataset. */
    "unique_keys": ["transaction_id"],
  },
  "mapping": {
    "amount": {
      "label": "Total amount",
      "description": "...",
      /* New type of field in mapping: */
      "type": "measure",
      /* Don't do float for money any longer: */
      "datatype": "decimal",
      "column": "amount"
      /* TODO: Should this have support for default values and 
      * constant values? */
    },
    /* Secondary measure: */
    "cofinance_amount": {
      "label": "Co-financed amount",
      "description": "...",
      /* New type of field in mapping: */
      "type": "measure",
      /* Don't do float for money any longer: */
      "datatype": "decimal",
      "column": "cofinance"
    },
    /* Time dimension. Note it is not called "time". Good idea? */
    "grant_date": {
      "label": "Date of grant",
      "description": "...",
      /* Optional: type for field in mapping. */
      "type": "date",
      "datatype": "date",
      /* NOTE
      * This is a bastard type: the datatype should first yield python date 
      * objects but then we'll store something else to the database: either 
      * a DATETIME column or (cleaner) a proper date table with multiple 
      * fields:
      *
      *   date_id, year, month, quarter, day
      */
      "column": "date_grant"
      /* NOTE
      * I propose we abolish the "end_column" hack and have multiple date 
      * dimensions if necessary: grant_date, disbursed_date, project_end.
      */
    },
    /* Have simple attribute dimensions for non-compound entities with many
    * different values: */
    "transaction_id": {
      "label": "Transaction ID",
      "description": "...",
      "facet": false,
      /* Could also be "attribute": */
      "type": "value",
      "column": "transaction_id",
      /* This is assumed by default: */
      "datatype": "string"
    },
    /* The thing formerly known as an entity: */
    "beneficiary": {
      "label": "Beneficiary",
      "description": "...",
      /* This is assumed as a default, supersedes "type": "entity": */
      "scheme": "entity",
      /* Just an idea, hard to actually enumerate: */
      "classes": ["individual", "company", "nonprofit"],
      "facet": true,
      /* IMO these can also have this - but nick will disagree? */
      "unqiue_keys": ["label", "country"],
      "attributes": {
        /* Always enforce presence of a name attribute? */
        "name": {
          /* Re-name ID */
          "datatype": "slug",
          "column": "beneficiary"
        },
        "label": {
          "datatype": "string",
          "column": "beneficiary"
        },
        "country": {
          "datatype": "string",
          "column": "beneficiary_country"
        }
      }
    },
    /* The thing formerly known as a classifier: */
    "objective": {
      "label": "Objective (Level 1)",
      "description": "...",
      "scheme": "funding-taxonomy",
      "facet": true,
      "attributes": {
        "name": {
          "datatype": "slug",
          "column": "objective"
        },
        "label": {
          "datatype": "string",
          "column": "objective"
        }
      }
    },
    /* Support hierarchies of classifiers: */
    "goal": {
      "label": "Goal (Level 2)",
      "description": "...",
      "scheme": "funding-taxonomy",
      /* Proposal: let's have the parent classifier given by name and specify
      * a column on this dimension that refers back to the higher-level 
      * dimension. The latter is not strictly necessary, I think. 
      */
      "parent": {"objective": "objective_name"},
      "attributes": {
        "name": {
          "datatype": "slug",
          "column": "goal"
        },
        "label": {
          "datatype": "string",
          "column": "goal"
        }
        "objective_name": {
          "datatype": "slug",
          "column": "objective"
        },
      }
    },
    /*
    * 
    * ALTERNATIVES 
    *
    */


    /* 
    * mk270: remove "Value Dimensions" completely, have all dimensions with 
    * attributes: 
    */
    "transaction_id": {
      "label": "Transaction ID",
      "description": "...",
      "facet": false,
      "attributes": [{
        /* Could also be "attribute": */
        "column": "transaction_id",
        /* This is assumed by default: */
        "datatype": "string"
      }]
      /* Criticism: 
      * 1) Utility: its nice and easy to have attributes on entries, although
      * not strictly clean.
      * 2) Makes nicer hash representation of attributes impossible.
      * 3) How do you enforce there's only one?
      * 4) Given that "value dimensions" can be generated by the same code as 
      *    "dimension attributes" - is it really easier to generate this?
      */
    },


    /* 
    * pudo: separate "Dimension Model" from "Source file mapping":
    *
    * -> This is one level up, mentally:
    */
    /* This is fully abstract, e.g. for all of ERDF, ESF or some such collection
    * of data sets: 
    */
    "model": {
      "measure1": { /* Full description. */ }
      "measure2": { /* Full description. */ }
      "dimension1": { /* Full description with attributes. */ }
      "dimension2": { /* Full description with attributes. */ }
      "dimension3": { /* Full description with attributes. */ }
      "dimension4": { /* Full description with attributes. */ }
    }
    /* This is specific to one CSV representation. */
    "mapping": {
      "measure1": {"column": "measure", "datatype": "decimal"}
      "measure2": {"column": "other_measure", "datatype": "decimal"}
      "dimension1.attrib1": {"column": "foo", "datatype": "string"}
      "dimension1.attrib2": {"column": "bar", "datatype": "string"}
    }
  }
 }
	/* Proposed OpenSpending data model, v2 */

	/* Core issues we want to address:
	*
	* - Merge "entities" and "classifiers".
	* - Use better terminology.
	* - Decide whether to still have value dimensions.
	* - Handle time better (no "end_column")
	*
	* Secondary but long-term goals:
	*
	* - Dimension hierarchies.
	* - Soft typing of dimensions.
	*/

	var model_proposal_fl = {
	/* Dataset metadata */
	"dataset": {
	/* Name, needs to have a number of forbidden words. */
	"name": "test",
	/* Label, a free-form description. */
	"label": "Example Dataset",
	"description": "I'm an example dataset, not much to see here.",
	/* Set of entry properties sufficient to make each entry unique in
	* the dataset. */
	"unique_keys": ["transaction_id"],
	},
	"mapping": {
	"amount": {
	"label": "Total amount",
	"description": "...",
	/* New type of field in mapping: */
	"type": "measure",
	/* Don't do float for money any longer: */
	"datatype": "decimal",
	"column": "amount"
	/* TODO: Should this have support for default values and
	* constant values? */
	},
	/* Secondary measure: */
	"cofinance_amount": {
	"label": "Co-financed amount",
	"description": "...",
	/* New type of field in mapping: */
	"type": "measure",
	/* Don't do float for money any longer: */
	"datatype": "decimal",
	"column": "cofinance"
	},
	/* Time dimension. Note it is not called "time". Good idea? */
	"grant_date": {
	"label": "Date of grant",
	"description": "...",
	/* Optional: type for field in mapping. */
	"type": "date",
	"datatype": "date",
	/* NOTE
	* This is a bastard type: the datatype should first yield python date
	* objects but then we'll store something else to the database: either
	* a DATETIME column or (cleaner) a proper date table with multiple
	* fields:
	*
	* date_id, year, month, quarter, day
	*/
	"column": "date_grant"
	/* NOTE
	* I propose we abolish the "end_column" hack and have multiple date
	* dimensions if necessary: grant_date, disbursed_date, project_end.
	*/
	},
	/* Have simple attribute dimensions for non-compound entities with many
	* different values: */
	"transaction_id": {
	"label": "Transaction ID",
	"description": "...",
	"facet": false,
	/* Could also be "attribute": */
	"type": "value",
	"column": "transaction_id",
	/* This is assumed by default: */
	"datatype": "string"
	},
	/* The thing formerly known as an entity: */
	"beneficiary": {
	"label": "Beneficiary",
	"description": "...",
	/* This is assumed as a default, supersedes "type": "entity": */
	"scheme": "entity",
	/* Just an idea, hard to actually enumerate: */
	"classes": ["individual", "company", "nonprofit"],
	"facet": true,
	/* IMO these can also have this - but nick will disagree? */
	"unqiue_keys": ["label", "country"],
	"attributes": {
	/* Always enforce presence of a name attribute? */
	"name": {
	/* Re-name ID */
	"datatype": "slug",
	"column": "beneficiary"
	},
	"label": {
	"datatype": "string",
	"column": "beneficiary"
	},
	"country": {
	"datatype": "string",
	"column": "beneficiary_country"
	}
	}
	},
	/* The thing formerly known as a classifier: */
	"objective": {
	"label": "Objective (Level 1)",
	"description": "...",
	"scheme": "funding-taxonomy",
	"facet": true,
	"attributes": {
	"name": {
	"datatype": "slug",
	"column": "objective"
	},
	"label": {
	"datatype": "string",
	"column": "objective"
	}
	}
	},
	/* Support hierarchies of classifiers: */
	"goal": {
	"label": "Goal (Level 2)",
	"description": "...",
	"scheme": "funding-taxonomy",
	/* Proposal: let's have the parent classifier given by name and specify
	* a column on this dimension that refers back to the higher-level
	* dimension. The latter is not strictly necessary, I think.
	*/
	"parent": {"objective": "objective_name"},
	"attributes": {
	"name": {
	"datatype": "slug",
	"column": "goal"
	},
	"label": {
	"datatype": "string",
	"column": "goal"
	}
	"objective_name": {
	"datatype": "slug",
	"column": "objective"
	},
	}
	},
	/*
	*
	* ALTERNATIVES
	*
	*/


	/*
	* mk270: remove "Value Dimensions" completely, have all dimensions with
	* attributes:
	*/
	"transaction_id": {
	"label": "Transaction ID",
	"description": "...",
	"facet": false,
	"attributes": [{
	/* Could also be "attribute": */
	"column": "transaction_id",
	/* This is assumed by default: */
	"datatype": "string"
	}]
	/* Criticism:
	* 1) Utility: its nice and easy to have attributes on entries, although
	* not strictly clean.
	* 2) Makes nicer hash representation of attributes impossible.
	* 3) How do you enforce there's only one?
	* 4) Given that "value dimensions" can be generated by the same code as
	* "dimension attributes" - is it really easier to generate this?
	*/
	},


	/*
	* pudo: separate "Dimension Model" from "Source file mapping":
	*
	* -> This is one level up, mentally:
	*/
	/* This is fully abstract, e.g. for all of ERDF, ESF or some such collection
	* of data sets:
	*/
	"model": {
	"measure1": { /* Full description. */ }
	"measure2": { /* Full description. */ }
	"dimension1": { /* Full description with attributes. */ }
	"dimension2": { /* Full description with attributes. */ }
	"dimension3": { /* Full description with attributes. */ }
	"dimension4": { /* Full description with attributes. */ }
	}
	/* This is specific to one CSV representation. */
	"mapping": {
	"measure1": {"column": "measure", "datatype": "decimal"}
	"measure2": {"column": "other_measure", "datatype": "decimal"}
	"dimension1.attrib1": {"column": "foo", "datatype": "string"}
	"dimension1.attrib2": {"column": "bar", "datatype": "string"}
	}
	}
	}