Skip to content

Instantly share code, notes, and snippets.

@tomasgreif
Created November 29, 2013 15:34
Show Gist options
  • Save tomasgreif/7707386 to your computer and use it in GitHub Desktop.
Save tomasgreif/7707386 to your computer and use it in GitHub Desktop.
standardize_name <- function(string) {
capped <- grep("^[^A-Z]*$", string, perl = TRUE)
substr(string[capped], 1, 1) <- toupper(substr(string[capped], 1, 1))
string <- gsub('_',' ',string, fixed=TRUE)
return(string)
}
GenerateCube <- function(TableName,TableSchema,PrimaryKey,TimeTableName,TimeTableSchema,Destination) {
library(sqldf)
options(sqldf.RPostgreSQL.user = "usr",
sqldf.RPostgreSQL.password = "pwd,
sqldf.RPostgreSQL.dbname = "db",
sqldf.RPostgreSQL.host = "host",
sqldf.RPostgreSQL.port = 5432)
TableDesign <- sqldf(paste0("select column_name, data_type, ordinal_position from information_schema.columns where table_name = '",TableName,"' and table_schema='",TableSchema,"'"),drv='PostgreSQL')
VariablesCharacter <- TableDesign[TableDesign$data_type %in% c('text','character varying','smallint','integer'),1]
VariablesCharacterNames <- standardize_name(VariablesCharacter)
VariablesNumeric <- sort(TableDesign[TableDesign$data_type %in% c('numeric','integer','smallint','bigint'),1])
VariablesDate <- TableDesign[TableDesign$data_type %in% c('date'),1]
VariablesDateNames <- standardize_name(VariablesDate)
Aggregations <- c('sum','count','min','max','avg','distinct-count')
AggregationsNames <- c('Sum','Cnt','Min','Max','Avg','Dcnt')
DimensionsDate <- paste0(
'<Dimension type="TimeDimension" visible="true" foreignKey="',VariablesDate,'" highCardinality="false" name="',VariablesDateNames,'">
<Hierarchy name="Time Hierarchy" visible="true" hasAll="true" primaryKey="time_date">
<Table name="',TimeTableName,'" schema="',TimeTableSchema,'">
</Table>
<Level name="Year" visible="true" column="year_number" ordinalColumn="year_number" type="Integer" internalType="int" uniqueMembers="false" levelType="TimeYears" hideMemberIf="Never">
</Level>
<Level name="Quarter" visible="true" column="quarter_number" ordinalColumn="quarter_number" type="Integer" uniqueMembers="false" levelType="TimeQuarters" hideMemberIf="Never">
</Level>
<Level name="Month" visible="true" column="month_number" ordinalColumn="month_number" type="Integer" uniqueMembers="false" levelType="TimeMonths" hideMemberIf="Never">
</Level>
</Hierarchy>
</Dimension>',collapse='\n')
DimensionsVarchar <-
paste(
'<Dimension type="StandardDimension" visible="true" foreignKey="',PrimaryKey,'" highCardinality="false" name="',VariablesCharacterNames,'">
<Hierarchy name="',VariablesCharacterNames,'" visible="true" hasAll="true" primaryKey="',PrimaryKey,'">
<Table name="',TableName,'" schema="',TableSchema,'">
</Table>
<Level name="',VariablesCharacterNames,'" visible="true" column="',VariablesCharacter,'" type="String" uniqueMembers="false" levelType="Regular" hideMemberIf="Never">
</Level>
</Hierarchy>
</Dimension>',sep='', collapse='')
MeasurePrimaryKey <- paste('<Measure name="','Count (PK)','" column="',PrimaryKey,'" aggregator="count" visible="true"></Measure>',sep='')
Measures <- character(0)
for (i in seq_along(Aggregations)) {
Measures <- paste0(Measures,
paste0('<Measure name="',AggregationsNames[i],'-',standardize_name(VariablesNumeric),'"
column="',VariablesNumeric,'" aggregator="',Aggregations[i],'" visible="true"></Measure>', collapse='\n'), collapse='\n')
}
CubeHeader <- paste0('<Schema name="',TableName,'"><Cube name="project" visible="true" cache="true" enabled="true">
<Table name="',TableName,'" schema="',TableSchema,'"></Table>')
CubeFooter <- '</Cube></Schema>'
writeLines(paste0(CubeHeader,DimensionsVarchar,DimensionsDate,MeasurePrimaryKey,Measures,CubeFooter, collapse=""),con=Destination)
}
#GenerateCube('big_portfolio','public','id','tmp_time','public','/home/tgr/Applications/Saiku/saiku-server/tomcat/webapps/saiku/WEB-INF/classes/foodmart/test.xml')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment