Last active
May 29, 2020 00:39
-
-
Save richpsharp/27f062594ef20396962edf4c9ad4f73e to your computer and use it in GitHub Desktop.
Script to evalulate Justin lasso stats values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Demo of how to use pandas to multiply one table by another.""" | |
import argparse | |
import os | |
from osgeo import gdal | |
from osgeo import ogr | |
from osgeo import osr | |
import pandas | |
parser = argparse.ArgumentParser(description='mult by columns script') | |
parser.add_argument( | |
'--point_cloud', action='store_true', | |
help='if present do the justin positional index thing') | |
parser.add_argument( | |
'--base_table_path', type=str, required=True) | |
parser.add_argument( | |
'--lasso_table_path', type=str, required=True) | |
args = parser.parse_args() | |
# Justin said this was his reference | |
esa_lulc_geotransform = (-180.0, 30*0.0027777778, 0.0, 90.0, 0.0, 30*-0.0027777778) | |
esa_n_cols = 129600 // 30 | |
inv_gt = gdal.InvGeoTransform(esa_lulc_geotransform) | |
base_table_path = args.base_table_path | |
lasso_table_path = args.lasso_table_path | |
print('reading') | |
base_table_df = pandas.read_csv(base_table_path) | |
lasso_df = pandas.read_csv(lasso_table_path) | |
target_df = pandas.DataFrame() | |
if args.point_cloud: | |
gpkg_driver = ogr.GetDriverByName('GPKG') | |
base_id = os.path.basename(os.path.splitext(base_table_path)[0]) | |
vector = gpkg_driver.CreateDataSource(f'{base_id}.gpkg') | |
wgs84_srs = osr.SpatialReference() | |
wgs84_srs.ImportFromEPSG(4326) | |
layer = vector.CreateLayer(base_id, wgs84_srs, geom_type=ogr.wkbPoint) | |
layer_def = layer.GetLayerDefn() | |
header_pos = {} | |
print('start lasso') | |
for row_index, row in lasso_df.iterrows(): | |
header = row[0] | |
header_pos[header] = row_index | |
if args.point_cloud: | |
field = ogr.FieldDefn(header, ogr.OFTReal) | |
layer.CreateField(field) | |
lasso_val = row[1] | |
if header in base_table_df: | |
# modify that column | |
target_df[header] = base_table_df[header]*lasso_val | |
if '*' in header: | |
# add a new column | |
col_a, col_b = header.split('*') | |
target_df[header] = ( | |
base_table_df[col_a] * base_table_df[col_b] * row[1]) | |
if '^' in header: | |
col, power = header.split('^') | |
target_df[header] = base_table_df[col]**int(power)*lasso_val | |
# Add a "predicted" column | |
predicted_col = [] | |
for _, row in target_df.iterrows(): | |
# the "2:" don't include the coordates that are the first two columns | |
predicted_col.append(sum(row[2:])) | |
target_df['predicted'] = predicted_col | |
header_pos['predicted'] = len(header_pos) | |
if args.point_cloud: | |
field = ogr.FieldDefn('predicted', ogr.OFTReal) | |
layer.CreateField(field) | |
layer.StartTransaction() | |
print('generate point cloud') | |
for row_index, row in base_table_df.iterrows(): | |
positional_index = row['positional_index'] | |
pos_x = positional_index % esa_n_cols | |
pos_y = positional_index // esa_n_cols | |
lng, lat = gdal.ApplyGeoTransform(esa_lulc_geotransform, pos_x, pos_y) | |
point_geom = ogr.Geometry(ogr.wkbPoint) | |
point_geom.AddPoint(lng, lat) | |
point_feature = ogr.Feature(layer_def) | |
point_feature.SetGeometry(point_geom) | |
for header, header_col in header_pos.items(): | |
point_feature.SetField( | |
header, target_df.iloc[row_index, header_col]) | |
layer.CreateFeature(point_feature) | |
layer.CommitTransaction() | |
layer = None | |
vector = None | |
print('writing') | |
target_df.to_csv('result.csv', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment