This describes a single variable within the variables
section.
Note: The interpretation
and tworavens
sections are placeholders in this version
{
"type":"object",
"patternProperties":{
"^[_a-zA-Z0-9]+$":{
"type":"object",
"properties":{
"variableName":{
"type":"string"
},
"description":{
"type":"string",
"description":"Brief explanation of the variable "
},
"numchar":{
"type":"string",
"description":"Describes the variable as numeric or character valued",
"enum":[
"character",
"numeric"
]
},
"nature":{
"type":"string",
"description":"Describes the classification of data into Nominal, Ordinal, Ratio, Interval, Percentage.",
"enum":[
"interval",
"nominal",
"ordinal",
"percent",
"ratio",
"other"
]
},
"binary":{
"type":"boolean",
"description":"Signifies that the data can only take two values"
},
"interval":{
"type":"string",
"description":"Describes numeric variables as either continuously valued, or discrete valued",
"enum":[
"continuous",
"discrete"
]
},
"time":{
"type":"string",
"description":"Signifies that the variable describes points in time",
"enum":[
"no",
"yes",
"unknown"
]
},
"invalidCount":{
"type":"integer",
"description":"Counts the number of invalid observations, including missing values, nulls, NA's and any observation with a value enumerated in invalidSpecialCodes"
},
"invalidSpecialCodes":{
"type":"array",
"items":{
"type":"number"
},
"description":"Any numbers that represent invalid observations"
},
"validCount":{
"type":"integer",
"description":"Counts the number of valid observations"
},
"uniqueCount":{
"type":"integer",
"description":"Count of unique values, including invalid signifiers"
},
"median":{
"type":[
"number",
"string"
],
"description":"A central value in the distribution such that there are as many values equal or above, as there are equal or below this value.",
"oneOf":[
{
"type":"number"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"mean":{
"type":[
"number",
"string"
],
"description":"Average of all numeric values, which are not contained in invalidSpecialCodes",
"oneOf":[
{
"type":"number"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"max":{
"type":[
"number",
"string"
],
"description":"Largest numeric value observed in dataset, that is not contained in invalidSpecialCodes",
"oneOf":[
{
"type":"number"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"min":{
"type":[
"number",
"string"
],
"description":"Least numeric value observed in dataset, that is not contained in invalidSpecialCodes",
"oneOf":[
{
"type":"number"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"mode":{
"type":[
"array",
"string"
],
"description":"Value that occurs most frequently. Multiple values in the case of ties.",
"oneOf":[
{
"type":"array",
"items":{
"type":"number"
}
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"modeFreq":{
"type":[
"integer",
"string"
],
"description":"Number of times value of mode is observed in variable",
"oneOf":[
{
"type":"integer"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"fewestValues":{
"type":[
"array",
"string"
],
"description":"Value that occurs least frequently. Multiple values in the case of ties.",
"oneOf":[
{
"type":"array",
"items":{
"type":"number"
}
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"fewestFreq":{
"type":[
"integer",
"string"
],
"description":"Number of times value of fewestValues is observed in variable",
"oneOf":[
{
"type":"integer"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"midpoint":{
"type":[
"number",
"string"
],
"description":"The value equidistant from the reported min and max values",
"oneOf":[
{
"type":"number"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"midpointFreq":{
"type":[
"integer",
"string"
],
"description":"Number of observations with value equal to minpoint",
"oneOf":[
{
"type":"integer"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"stdDev":{
"type":[
"number",
"string"
],
"description":"Standard deviation of the values, measuring the spread between values, specifically using population formula",
"oneOf":[
{
"type":"number"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"herfindahlIndex":{
"type":[
"number",
"string"
],
"description":"Measure of heterogeneity of a categorical variable which gives the probability that any two randomly sampled observations have the same value",
"oneOf":[
{
"type":"number"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"plotValues":{
"type":[
"object",
"string"
],
"description":"Plot points of a bar chart for tracing distribution of variable",
"oneOf":[
{
"type":"object"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"plotType":{
"type":"string",
"description":"Describes default type of plot appropriate to represent distribution of variable",
"enum":[
"bar",
"continuous"
]
},
"plotX":{
"type":[
"array",
"string"
],
"description":"Plot points along x dimension for tracing distribution of variable",
"oneOf":[
{
"type":"array",
"items":{
"type":"number"
}
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"plotY":{
"type":[
"array",
"string"
],
"description":"Plot points along y dimension for tracing distribution of variable",
"oneOf":[
{
"type":"array",
"items":{
"type":"number"
}
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"cdfPlotType":{
"type":[
"string",
"string"
],
"description":"Describes default type of plot appropriate to represent cumulative distribution of variable",
"oneOf":[
{
"type":"string"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"cdfPlotX":{
"type":[
"array",
"string"
],
"description":"Plot points along x dimension for tracing cumulative distribution of variable",
"oneOf":[
{
"type":"array",
"items":{
"type":"number"
}
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"cdfPlotY":{
"type":[
"array",
"string"
],
"description":"Plot points along y dimension for tracing cumulative distribution of variable",
"oneOf":[
{
"type":"array",
"items":{
"type":"number"
}
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"interpretation":{
"type":[
"object",
"string"
],
"description":"Object containing descriptors to interpret variable",
"oneOf":[
{
"type":"object"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
},
"tworavens":{
"type":[
"object",
"string"
],
"description":"Object containing metadata specifically used by TwoRavens platform",
"oneOf":[
{
"type":"object"
},
{
"type":"string",
"enum":[
"NA"
]
}
]
}
},
"required":[
"variableName", "description", "numchar", "nature", "binary", "interval", "time", "invalidCount", "validCount", "uniqueCount", "median", "mean", "max", "min", "mode", "modeFreq", "fewestValues", "fewestFreq", "midpoint", "midpointFreq", "stdDev", "herfindahlIndex", "plotValues", "plotType", "plotX", "plotY", "cdfPlotType", "cdfPlotX", "cdfPlotY"
]
}
},
"additionalProperties":false,
"minProperties":1
}