Last active
December 16, 2015 09:08
-
-
Save martin0258/5410428 to your computer and use it in GitHub Desktop.
Generate T-SQL for applying windowing transformation to time-series data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# vim:fenc=utf-8 | |
# | |
# Copyright © 2013 martinku <martinku@ss-martin-ku> | |
# | |
# Distributed under terms of the MIT license. | |
""" | |
Purpose: | |
Apply windowing transformation to time-series data. | |
That is, transform time-series data to regression cases used for MS neural network. | |
Input: | |
1. A time series DB table consists of two columns (time and value). | |
For examplae: DB table [ETC].[dbo].[F_SHB_Weekly_Passing] | |
| WeekNum | Passings | | |
| 1 | 12334 | | |
| 2 | 12335 | | |
2. two parameters | |
- n : # of input units | |
- m : # of output units | |
Output: | |
T-SQL that generates the following table. | |
| CurrentWeekNum | I1 | I2 | ... | In | O1 | O2 | ... | Om | | |
| n | 21 | 22 | ... | 67 | 34 | 43 | ... | 65 | | |
| n+1 | 22 | 23 | ... | 68 | 35 | 44 | ... | 66 | | |
""" | |
import argparse | |
from string import Template | |
# settings (adjusted by script users) | |
table_name = "[ETC].[dbo].[F_SHB_Weekly_Passing]" | |
time_column_name = "WeekNum" | |
value_column_name = "Passings" | |
max_time_column = 105 | |
# input parameters (passed in via cmd arguments) | |
parser = argparse.ArgumentParser() | |
parser.add_argument("num_input", help="the number of input", type=int) | |
parser.add_argument("num_output", help="the number of output", type=int) | |
args = parser.parse_args() | |
num_input = args.num_input | |
num_output = args.num_output | |
output_file = "windowing_transformation.sql" | |
# generating sql | |
final_sql = "" | |
window_length = num_input + num_output | |
num_case = max_time_column - window_length + 1 | |
for j in range(1,num_case+1): | |
# build sql for each case | |
pivot_column = [] | |
template = Template("SELECT ${currentTime} AS Current${time_column}") | |
sql = template.substitute( | |
currentTime=str(num_input+j-1), | |
time_column=time_column_name | |
) | |
for i in range(1,num_input+1): | |
pivot_column.append("[%d]" % int(i+j-1)) | |
template = Template("\n,[${In}] AS I${i}") | |
sql += template.substitute( | |
In=str(i+j-1), | |
i=str(i) | |
) | |
for i in range(1,num_output+1): | |
pivot_column.append("[%d]" % int(num_input+i+j-1)) | |
template = Template("\n,[${On}] AS O${i}") | |
sql += template.substitute( | |
On=str(num_input+i+j-1), | |
i=str(i) | |
) | |
template = Template(""" | |
FROM ${table} | |
PIVOT ( | |
SUM(${value_column}) | |
FOR ${time_column} IN (${pivot_column}) | |
) AS t | |
""") | |
sql += template.substitute( | |
table=table_name, | |
value_column=value_column_name, | |
time_column=time_column_name, | |
pivot_column=",".join(pivot_column) | |
) | |
final_sql += sql | |
final_sql += "\nUNION\n" if j<num_case else "" | |
final_sql += "\nORDER BY Current%s" % time_column_name | |
print final_sql | |
output = open(output_file,'w') | |
output.write(final_sql) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment