-
-
Save HTenkanen/3b214be899f0d3885bad48577de48150 to your computer and use it in GitHub Desktop.
import pandas as pd | |
import geopandas as gpd | |
from sqlalchemy import create_engine | |
from geoalchemy2 import Geometry | |
from shapely.geometry import MultiLineString, MultiPoint, MultiPolygon | |
from shapely.wkb import dumps | |
import io | |
from pyproj import CRS | |
import csv | |
import time | |
import pygeos | |
def timeit(method): | |
def timed(*args, **kw): | |
ts = time.time() | |
result = method(*args, **kw) | |
te = time.time() | |
if 'log_time' in kw: | |
name = kw.get('log_name', method.__name__.upper()) | |
kw['log_time'][name] = round((te - ts) / 60, 2) | |
else: | |
print('%r %.2f seconds' % \ | |
(method.__name__, (te - ts) )) | |
return result | |
return timed | |
@timeit | |
def get_geometry_type(gdf): | |
"""Get basic geometry type of a GeoDataFrame, and information if the gdf contains Geometry Collections.""" | |
geom_types = list(gdf.geometry.geom_type.unique()) | |
geom_collection = False | |
# Get the basic geometry type | |
basic_types = [] | |
for gt in geom_types: | |
if 'Multi' in gt: | |
geom_collection = True | |
basic_types.append(gt.replace('Multi', '')) | |
else: | |
basic_types.append(gt) | |
geom_types = list(set(basic_types)) | |
# Check for mixed geometry types | |
assert len(geom_types) < 2, "GeoDataFrame contains mixed geometry types, cannot proceed with mixed geometries." | |
geom_type = geom_types[0] | |
return (geom_type, geom_collection) | |
@timeit | |
def get_srid_from_crs(gdf): | |
""" | |
Get EPSG code from CRS if available. If not, return -1. | |
""" | |
if gdf.crs is not None: | |
try: | |
if isinstance(gdf.crs, dict): | |
# If CRS is in typical geopandas format take only the value to avoid pyproj Future warning | |
if 'init' in gdf.crs.keys(): | |
srid = CRS(gdf.crs['init']).to_epsg(min_confidence=25) | |
else: | |
srid = CRS(gdf.crs).to_epsg(min_confidence=25) | |
else: | |
srid = CRS(gdf).to_epsg(min_confidence=25) | |
if srid is None: | |
srid = -1 | |
except: | |
srid = -1 | |
if srid == -1: | |
print("Warning: Could not parse coordinate reference system from GeoDataFrame. Inserting data without defined CRS.") | |
return srid | |
@timeit | |
def convert_to_wkb(gdf, geom_name): | |
# Convert geometries to wkb | |
# With pygeos | |
gdf[geom_name] = pygeos.to_wkb(pygeos.from_shapely(gdf[geom_name].to_list()), hex=True) | |
# With Shapely | |
# gdf[geom_name] = gdf[geom_name].apply(lambda x: dumps(x, hex=True)) | |
return gdf | |
@timeit | |
def write_to_db(gdf, engine, index, tbl, table, schema, srid, geom_name): | |
# Convert columns to lists and make a generator | |
args = [list(gdf[i]) for i in gdf.columns] | |
if index: | |
args.insert(0,list(gdf.index)) | |
data_iter = zip(*args) | |
# get list of columns using pandas | |
keys = tbl.insert_data()[0] | |
columns = ', '.join('"{}"'.format(k) for k in list(keys)) | |
# borrowed from https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#insertion-method | |
s_buf = io.StringIO() | |
writer = csv.writer(s_buf) | |
writer.writerows(data_iter) | |
s_buf.seek(0) | |
conn = engine.raw_connection() | |
cur = conn.cursor() | |
sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format( | |
table, columns) | |
try: | |
cur.copy_expert(sql=sql, file=s_buf) | |
cur.execute("SELECT UpdateGeometrySRID('{table}', '{geometry}', {srid})".format( | |
schema=schema, table=table, geometry=geom_name, srid=srid)) | |
conn.commit() | |
except Exception as e: | |
conn.connection.rollback() | |
conn.close() | |
raise(e) | |
conn.close() | |
@timeit | |
def copy_to_postgis(gdf, engine, table, if_exists='fail', | |
schema=None, dtype=None, index=False, | |
): | |
""" | |
Fast upload of GeoDataFrame into PostGIS database using COPY. | |
Parameters | |
---------- | |
gdf : GeoDataFrame | |
GeoDataFrame containing the data for upload. | |
engine : SQLAclchemy engine. | |
Connection. | |
if_exists : str | |
What to do if table exists already: 'replace' | 'append' | 'fail'. | |
schema : db-schema | |
Database schema where the data will be uploaded (optional). | |
dtype : dict of column name to SQL type, default None | |
Optional specifying the datatype for columns. The SQL type should be a | |
SQLAlchemy type, or a string for sqlite3 fallback connection. | |
index : bool | |
Store DataFrame index to the database as well. | |
""" | |
gdf = gdf.copy() | |
geom_name = gdf.geometry.name | |
if schema is not None: | |
schema_name = schema | |
else: | |
schema_name = 'public' | |
# Get srid | |
srid = get_srid_from_crs(gdf) | |
# Check geometry types | |
geometry_type, contains_multi_geoms = get_geometry_type(gdf) | |
# Build target geometry type | |
if contains_multi_geoms: | |
target_geom_type = "Multi{geom_type}".format(geom_type=geometry_type) | |
else: | |
target_geom_type = geometry_type | |
# Build dtype with Geometry (srid is updated afterwards) | |
if dtype is not None: | |
dtype[geom_name] = Geometry(geometry_type=target_geom_type) | |
else: | |
dtype = {geom_name: Geometry(geometry_type=target_geom_type)} | |
# Get Pandas SQLTable object (ignore 'geometry') | |
# If dtypes is used, update table schema accordingly. | |
pandas_sql = pd.io.sql.SQLDatabase(engine) | |
tbl = pd.io.sql.SQLTable(name=table, pandas_sql_engine=pandas_sql, | |
frame=gdf, dtype=dtype, index=index) | |
# Check if table exists | |
if tbl.exists(): | |
# If it exists, check if should overwrite | |
if if_exists == 'replace': | |
pandas_sql.drop_table(table) | |
tbl.create() | |
elif if_exists == 'fail': | |
raise Exception("Table '{table}' exists in the database.".format(table=table)) | |
elif if_exists == 'append': | |
pass | |
else: | |
tbl.create() | |
# Ensure all geometries all Geometry collections if there were MultiGeometries in the table | |
if contains_multi_geoms: | |
mask = gdf[geom_name].geom_type==geometry_type | |
if geometry_type == 'Point': | |
gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(lambda geom: MultiPoint([geom])) | |
elif geometry_type == 'LineString': | |
gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(lambda geom: MultiLineString([geom])) | |
elif geometry_type == 'Polygon': | |
gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(lambda geom: MultiPolygon([geom])) | |
# Convert geometries to WKB | |
gdf = convert_to_wkb(gdf, geom_name) | |
# Write to database | |
write_to_db(gdf, engine, index, tbl, table, schema, srid, geom_name) | |
return | |
# ===================== | |
# TEST | |
# ===================== | |
data = gpd.read_file("https://gist.githubusercontent.com/HTenkanen/456ec4611a943955823a65729c9cf2aa/raw/be56f5e1e5c06c33cd51e89f823a7d770d8769b5/ykr_basegrid.geojson") | |
engine = create_engine("postgresql+psycopg2://myuser:mypwd@localhost:5432/mydb") | |
# Run with %timeit to get a proper time-profile | |
copy_to_postgis(data, engine, table='ykr_test', if_exists='replace', schema=None, dtype=None, index=True) |
Dear @HTenkanen, thank you for your answer.
I follow your steps but when I install your fork with:
pip install git+https://github.com/HTenkanen/geopandas.git
I upgraded my version of geopandas to 0.6 again:
gpd.__version__
'0.6.1+103.g8fc5d8d'
With your fork I have the following error:
ProgrammingError: (psycopg2.errors.UndefinedObject) «geometry» does not exist LINE 28: geometry geometry(MULTIPOLYGON,-1)
Full error here:
`data.to_postgis(con = engine, name = table, if_exists='replace')
UndefinedObject Traceback (most recent call last)
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1245 self.dialect.do_execute(
-> 1246 cursor, statement, parameters, context
1247 )
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
587 def do_execute(self, cursor, statement, parameters, context=None):
--> 588 cursor.execute(statement, parameters)
589
UndefinedObject: no existe el tipo «geometry»
LINE 28: geometry geometry(MULTIPOLYGON,-1)
^
The above exception was the direct cause of the following exception:
ProgrammingError Traceback (most recent call last)
in
----> 1 data.to_postgis(con = engine, name = table, if_exists='replace')
~.conda\envs\qgis_env4\lib\site-packages\geopandas\geodataframe.py in to_postgis(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
812 """
813 geopandas.io.sql.write_postgis(
--> 814 self, name, con, schema, if_exists, index, index_label, chunksize, dtype,
815 )
816
~.conda\envs\qgis_env4\lib\site-packages\geopandas\io\sql.py in write_postgis(gdf, name, con, schema, if_exists, index, index_label, chunksize, dtype)
359 )
360 else:
--> 361 tbl.create()
362
363 # Convert LinearRing geometries to LineString
~.conda\envs\qgis_env4\lib\site-packages\pandas\io\sql.py in create(self)
653 raise ValueError(f"'{self.if_exists}' is not valid for if_exists")
654 else:
--> 655 self._execute_create()
656
657 def _execute_insert(self, conn, keys, data_iter):
~.conda\envs\qgis_env4\lib\site-packages\pandas\io\sql.py in _execute_create(self)
639 # Inserting table into database, add to MetaData object
640 self.table = self.table.tometadata(self.pd_sql.meta)
--> 641 self.table.create()
642
643 def create(self):
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\sql\schema.py in create(self, bind, checkfirst)
868 if bind is None:
869 bind = _bind_or_error(self)
--> 870 bind._run_visitor(ddl.SchemaGenerator, self, checkfirst=checkfirst)
871
872 def drop(self, bind=None, checkfirst=False):
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, connection, **kwargs)
2047 ):
2048 with self._optional_conn_ctx_manager(connection) as conn:
-> 2049 conn._run_visitor(visitorcallable, element, **kwargs)
2050
2051 class _trans_ctx(object):
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, **kwargs)
1616
1617 def _run_visitor(self, visitorcallable, element, **kwargs):
-> 1618 visitorcallable(self.dialect, self, **kwargs).traverse_single(element)
1619
1620
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\sql\visitors.py in traverse_single(self, obj, **kw)
136 meth = getattr(v, "visit_%s" % obj.visit_name, None)
137 if meth:
--> 138 return meth(obj, **kw)
139
140 def iterate(self, obj):
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\sql\ddl.py in visit_table(self, table, create_ok, include_foreign_key_constraints, _is_metadata_operation)
824 table,
825 include_foreign_key_constraints= # noqa
--> 826 include_foreign_key_constraints, # noqa
827 )
828 # fmt: on
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object_, *multiparams, **params)
980 raise exc.ObjectNotExecutableError(object_)
981 else:
--> 982 return meth(self, multiparams, params)
983
984 def _execute_function(self, func, multiparams, params):
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\sql\ddl.py in _execute_on_connection(self, connection, multiparams, params)
70
71 def _execute_on_connection(self, connection, multiparams, params):
---> 72 return connection._execute_ddl(self, multiparams, params)
73
74 def execute(self, bind=None, target=None):
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\engine\base.py in _execute_ddl(self, ddl, multiparams, params)
1042 compiled,
1043 None,
-> 1044 compiled,
1045 )
1046 if self._has_events or self.engine._has_events:
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1248 except BaseException as e:
1249 self._handle_dbapi_exception(
-> 1250 e, statement, parameters, cursor, context
1251 )
1252
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1474 util.raise_from_cause(newraise, exc_info)
1475 elif should_wrap:
-> 1476 util.raise_from_cause(sqlalchemy_exception, exc_info)
1477 else:
1478 util.reraise(*exc_info)
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
396 exc_type, exc_value, exc_tb = exc_info
397 cause = exc_value if exc_value is not exception else None
--> 398 reraise(type(exception), exception, tb=exc_tb, cause=cause)
399
400
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
150 value.cause = cause
151 if value.traceback is not tb:
--> 152 raise value.with_traceback(tb)
153 raise value
154
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1244 if not evt_handled:
1245 self.dialect.do_execute(
-> 1246 cursor, statement, parameters, context
1247 )
1248 except BaseException as e:
~.conda\envs\qgis_env4\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
586
587 def do_execute(self, cursor, statement, parameters, context=None):
--> 588 cursor.execute(statement, parameters)
589
590 def do_execute_no_params(self, cursor, statement, context=None):
ProgrammingError: (psycopg2.errors.UndefinedObject) no existe el tipo «geometry»
LINE 28: geometry geometry(MULTIPOLYGON,-1)
^
[SQL:
CREATE TABLE public."table" (
"Shape_Length" FLOAT(53),
"Shape_Area" FLOAT(53),
"INDICE_XY" TEXT,,
"Z_VEGE" TEXT,
"FCC" TEXT,
"CIRCUITO" TEXT,
geometry geometry(MULTIPOLYGON,-1)
)
]
(Background on this error at: http://sqlalche.me/e/f405)`
I think this is not the right fork, I am trying to find the right one with the 0.7 version.
If I use the 0.7 version without fork I get the following error:
gpd.__version__
'0.7.0'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-6-5fe643cac22d> in <module>
----> 1 data.to_postgis(con = engine, name = table, if_exists='replace')
~\.conda\envs\qgis_env4\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5273 return self[name]
-> 5274 return object.__getattribute__(self, name)
5275
5276 def __setattr__(self, name: str, value) -> None:
AttributeError: 'GeoDataFrame' object has no attribute 'to_postgis'
@CibelesR. You need to use Geopandas 0.7.0 for this to work. to_postgis()
method won't work with Geopandas 0.6.0 version. I am pretty sure that your issues now relate to installation problems / something in your code, not to the package itself because they pass all our tests.
You could try to download the fork to your own laptop locally and then inside the downloaded folder run:
$ python setup.py install
Before doing that ensure that you have the geopandas 0.7.0 installed locally.
@HTenkanen. Thanks for sharing this gist, this is very useful!
I have ran this and the copy_to_postgis() does work, however there are several problems.
- The schema ='myschema' argument is ignored* and the file goes straight into the public schema.
- Warning message - Could not parse coordinate reference system from GeoDataFrame. Inserting data without defined CRS. Which does as expected e.g. does not pass the CRS into _postgreSQL db.
- If I pass in a Polygon object (not a MultiPolygon) I get the following error psycopg2.errors.InvalidParameterValue: Geometry type (Polygon) does not match column type (MultiPolygon) - See gist.
I am using; postgreSQL (Ubuntu 12.2-1.pgdg18.04+1) and POSTGIS="3.0.0 r17983" [EXTENSION] PGSQL="120" GEOS="3.7.1-CAPI-1.11.1 27a5e771" PROJ="Rel. 4.9.3, 15 August 2016" LIBXML="2.9.4" LIBJSON="0.12.1" LIBPROTOBUF="1.2.1" WAGYU="0.4.3 (Internal)"
I have also recreated your conda env, which produced the same results as what I got when I ran this on my local conda env (with geopandas==0.7). I did have to pip uninstall geopandas after running python setup.py install
on your fork and then reinstall geopandas via conda-forge, since geopandas==0.6.1 is setup on your fork. I also had to pip install sqlalchemy, geoalchemy2, pygeos and psycopg2.
This gist shows three reproducible example GeoDataFrames I have ran this function on; https://gist.github.com/tastatham/d436aa99b8d98263a3856fca75c69bfd#file-gpd_to_postgis_example-py
@HTenkanen you are right, I am doing something wrong when I install the library from the fork but I am not able to understand what.
When I use https://github.com/HTenkanen/geopandas directly, my version changes from geopandas from 0.7 to 0.6. I copy the console down here:
Successfully built geopandas
Installing collected packages: geopandas
Attempting uninstall: geopandas
Found existing installation: geopandas 0.7.0
Uninstalling geopandas-0.7.0:
Successfully uninstalled geopandas-0.7.0
Successfully installed geopandas-0.6.1+103.g8fc5d8d
For local installation, I download de .zip about the repo in the link you provide. I unzip the folder and with cd get my position there. Then, I run:
$ python setup.py install
When I install it from my downloads folder I get:
UPDATING build\lib\geopandas/_version.py
set build\lib\geopandas/_version.py to '0+unknown'
creating 'dist\geopandas-0+unknown-py3.7.egg' and adding 'build\bdist.win-amd64\egg' to it
removing 'build\bdist.win-amd64\egg' (and everything under it)
Processing geopandas-0+unknown-py3.7.egg
removing 'c:\users\credondo\.conda\envs\qgis_env4\lib\site-packages\geopandas-0+unknown-py3.7.egg' (and everything under it)
creating c:\users\credondo\.conda\envs\qgis_env4\lib\site-packages\geopandas-0+unknown-py3.7.egg
Extracting geopandas-0+unknown-py3.7.egg to c:\users\credondo\.conda\envs\qgis_env4\lib\site-packages
geopandas 0+unknown is already the active version in easy-install.pth
Installed c:\users\credondo\.conda\envs\qgis_env4\lib\site-packages\geopandas-0+unknown-py3.7.egg
I hope it can show some light in what part of the process I am doing wrong through the installation. Thank you so much for your interest and your time.
@tastatham @CibelesR: Thanks for your comments.
Indeed there was an issue writing a table into new schema when the table already existed. I now fixed that and pushed the changes to my fork. There was indeed also a bit of confusion with version naming as my fork did not have a tag for 0.7.0.
Now everything should be up-to-date with geopandas master, so you can directly install the Geopandas version 0.7.0 with to_postgis()
method only by executing:
$ pip install git+https://github.com/HTenkanen/geopandas.git
Hopefully this now solves these issues.
After that following commands should work:
import geopandas as gpd
from sqlalchemy import create_engine
data = gpd.read_file("https://gist.githubusercontent.com/HTenkanen/456ec4611a943955823a65729c9cf2aa/raw/be56f5e1e5c06c33cd51e89f823a7d770d8769b5/ykr_basegrid.geojson")
engine = create_engine("postgresql+psycopg2://myuser:mypwd@localhost:5432/mydb")
data.to_postgis(con=engine, name='test_table', if_exists='replace')
For future reference: Do not use this gist. It was a proof-of-concept that was created before making a PR for adding to_postgis()
-method into geopandas. This functionality should be coming out in the next release of GeoPandas.
If you cannot wait for that, you can test out the functionality by installing geopandas with pip using command (should work in Unix systems, not guaranteed for Windows):
$ pip install git+https://github.com/HTenkanen/geopandas.git
Hi @HTenkanen & @tastatham! Thank you for your answers.
I try it again. I must say I am trying in a Windows 10 x64 with conda environment. It is not working, as you said, it could be because of Windows (yes, I know... I will try in Unix system tonight).
The error I get seems to be relational with the geometry field. I try with multipolygon, polygon and linestring layers from .gdb, .shp and .dxf format files. I tried with and without geometry, and with and without crs. I always get the same error:
ProgrammingError: (psycopg2.errors.UndefinedObject) type «geometry» does not exists
LINE 28: geometry geometry(MULTIPOLYGON,-1)
^
I let you my code here so maybe I could be useful for Windows future adaptation:
import os
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine
import psycopg2
gpd.__version__
'0.7.0+2.g7be2578'
dirc = 'C:/Users/credondo/.../A'
GDB = 'grid.gdb'
os.chdir(dirc)
data = gpd.read_file(GDB, layer=0)
crs = {'init': 'epsg:25830'}
data = gpd.GeoDataFrame(data, crs = crs, geometry = data['geometry'])
engine = create_engine('postgresql+psycopg2://postgres:postgres@localhost:5432/postgres')
table = 'table'
data.to_postgis(con = engine, name = table, if_exists='replace')
And the error I get:
---------------------------------------------------------------------------
UndefinedObject Traceback (most recent call last)
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1245 self.dialect.do_execute(
-> 1246 cursor, statement, parameters, context
1247 )
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
587 def do_execute(self, cursor, statement, parameters, context=None):
--> 588 cursor.execute(statement, parameters)
589
UndefinedObject: does not exits «geometry» type
LINE 28: geometry geometry(MULTIPOLYGON,-1)
^
The above exception was the direct cause of the following exception:
ProgrammingError Traceback (most recent call last)
<ipython-input-55-5fe643cac22d> in <module>
----> 1 data.to_postgis(con = engine, name = table, if_exists='replace')
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\geopandas\geodataframe.py in to_postgis(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
812 """
813 geopandas.io.sql.write_postgis(
--> 814 self, name, con, schema, if_exists, index, index_label, chunksize, dtype,
815 )
816
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\geopandas\io\sql.py in write_postgis(gdf, name, con, schema, if_exists, index, index_label, chunksize, dtype)
359 )
360 else:
--> 361 tbl.create()
362
363 # Convert LinearRing geometries to LineString
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\pandas\io\sql.py in create(self)
653 raise ValueError(f"'{self.if_exists}' is not valid for if_exists")
654 else:
--> 655 self._execute_create()
656
657 def _execute_insert(self, conn, keys, data_iter):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\pandas\io\sql.py in _execute_create(self)
639 # Inserting table into database, add to MetaData object
640 self.table = self.table.tometadata(self.pd_sql.meta)
--> 641 self.table.create()
642
643 def create(self):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\sql\schema.py in create(self, bind, checkfirst)
868 if bind is None:
869 bind = _bind_or_error(self)
--> 870 bind._run_visitor(ddl.SchemaGenerator, self, checkfirst=checkfirst)
871
872 def drop(self, bind=None, checkfirst=False):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, connection, **kwargs)
2047 ):
2048 with self._optional_conn_ctx_manager(connection) as conn:
-> 2049 conn._run_visitor(visitorcallable, element, **kwargs)
2050
2051 class _trans_ctx(object):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, **kwargs)
1616
1617 def _run_visitor(self, visitorcallable, element, **kwargs):
-> 1618 visitorcallable(self.dialect, self, **kwargs).traverse_single(element)
1619
1620
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\sql\visitors.py in traverse_single(self, obj, **kw)
136 meth = getattr(v, "visit_%s" % obj.__visit_name__, None)
137 if meth:
--> 138 return meth(obj, **kw)
139
140 def iterate(self, obj):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\sql\ddl.py in visit_table(self, table, create_ok, include_foreign_key_constraints, _is_metadata_operation)
824 table,
825 include_foreign_key_constraints= # noqa
--> 826 include_foreign_key_constraints, # noqa
827 )
828 # fmt: on
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object_, *multiparams, **params)
980 raise exc.ObjectNotExecutableError(object_)
981 else:
--> 982 return meth(self, multiparams, params)
983
984 def _execute_function(self, func, multiparams, params):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\sql\ddl.py in _execute_on_connection(self, connection, multiparams, params)
70
71 def _execute_on_connection(self, connection, multiparams, params):
---> 72 return connection._execute_ddl(self, multiparams, params)
73
74 def execute(self, bind=None, target=None):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _execute_ddl(self, ddl, multiparams, params)
1042 compiled,
1043 None,
-> 1044 compiled,
1045 )
1046 if self._has_events or self.engine._has_events:
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1248 except BaseException as e:
1249 self._handle_dbapi_exception(
-> 1250 e, statement, parameters, cursor, context
1251 )
1252
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1474 util.raise_from_cause(newraise, exc_info)
1475 elif should_wrap:
-> 1476 util.raise_from_cause(sqlalchemy_exception, exc_info)
1477 else:
1478 util.reraise(*exc_info)
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
396 exc_type, exc_value, exc_tb = exc_info
397 cause = exc_value if exc_value is not exception else None
--> 398 reraise(type(exception), exception, tb=exc_tb, cause=cause)
399
400
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
150 value.__cause__ = cause
151 if value.__traceback__ is not tb:
--> 152 raise value.with_traceback(tb)
153 raise value
154
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1244 if not evt_handled:
1245 self.dialect.do_execute(
-> 1246 cursor, statement, parameters, context
1247 )
1248 except BaseException as e:
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
586
587 def do_execute(self, cursor, statement, parameters, context=None):
--> 588 cursor.execute(statement, parameters)
589
590 def do_execute_no_params(self, cursor, statement, context=None):
ProgrammingError: (psycopg2.errors.UndefinedObject) type «geometry» does not exists
LINE 28: geometry geometry(MULTIPOLYGON,-1)
^
[SQL:
CREATE TABLE public."table" (
"Shape_Length" FLOAT(53),
"Shape_Area" FLOAT(53),
"INDICE_XY" TEXT,
"VANO_FISICO" TEXT,
"CAMPO_1" TEXT,
"CAMPO_2" TEXT,
"CAMPO_3" TEXT,
"CAMPO_4" TEXT,
"H_VEGE" TEXT,
"Z_VEGE" TEXT,
"FCC" TEXT,
"H_CONDUC_1" TEXT,
"H_CONDUC_2" TEXT,
"H_CONDUC_3" TEXT,
"H_CONDUC_4" TEXT,
"Z_CONDUC_1" TEXT,
"Z_CONDUC_2" TEXT,
"Z_CONDUC_3" TEXT,
"Z_CONDUC_4" TEXT,
"DH_CONDUC_1" TEXT,
"DH_CONDUC_2" TEXT,
"DH_CONDUC_3" TEXT,
"DH_CONDUC_4" TEXT,
"FECHA" TEXT,
"CIRCUITO" TEXT,
geometry geometry(MULTIPOLYGON,-1)
)
]
(Background on this error at: http://sqlalche.me/e/f405)
@CibelesR The line below is causing your issues. You are trying to do something weird with the geometry
-parameter. The parameter geometry
is used for specifying the column name of the column containing the geometries:
Remove the following line altogether and you should be fine:
data = gpd.GeoDataFrame(data, crs = crs, geometry = data['geometry'])
If you are new to GIS in Python, I could recommend checking this online course we have authored:
https://automating-gis-processes.github.io/site/
@HTenkanen. I know GIS but I am super new at GIS in Python.
I removed the line, but the problem is still the same:
---------------------------------------------------------------------------
UndefinedObject Traceback (most recent call last)
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1245 self.dialect.do_execute(
-> 1246 cursor, statement, parameters, context
1247 )
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
587 def do_execute(self, cursor, statement, parameters, context=None):
--> 588 cursor.execute(statement, parameters)
589
UndefinedObject: does not exist «geometry» type
LINE 28: geometry geometry(MULTIPOLYGON,-1)
^
The above exception was the direct cause of the following exception:
ProgrammingError Traceback (most recent call last)
<ipython-input-6-5fe643cac22d> in <module>
----> 1 data.to_postgis(con = engine, name = table, if_exists='replace')
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\geopandas\geodataframe.py in to_postgis(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
812 """
813 geopandas.io.sql.write_postgis(
--> 814 self, name, con, schema, if_exists, index, index_label, chunksize, dtype,
815 )
816
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\geopandas\io\sql.py in write_postgis(gdf, name, con, schema, if_exists, index, index_label, chunksize, dtype)
359 )
360 else:
--> 361 tbl.create()
362
363 # Convert LinearRing geometries to LineString
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\pandas\io\sql.py in create(self)
653 raise ValueError(f"'{self.if_exists}' is not valid for if_exists")
654 else:
--> 655 self._execute_create()
656
657 def _execute_insert(self, conn, keys, data_iter):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\pandas\io\sql.py in _execute_create(self)
639 # Inserting table into database, add to MetaData object
640 self.table = self.table.tometadata(self.pd_sql.meta)
--> 641 self.table.create()
642
643 def create(self):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\sql\schema.py in create(self, bind, checkfirst)
868 if bind is None:
869 bind = _bind_or_error(self)
--> 870 bind._run_visitor(ddl.SchemaGenerator, self, checkfirst=checkfirst)
871
872 def drop(self, bind=None, checkfirst=False):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, connection, **kwargs)
2047 ):
2048 with self._optional_conn_ctx_manager(connection) as conn:
-> 2049 conn._run_visitor(visitorcallable, element, **kwargs)
2050
2051 class _trans_ctx(object):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, **kwargs)
1616
1617 def _run_visitor(self, visitorcallable, element, **kwargs):
-> 1618 visitorcallable(self.dialect, self, **kwargs).traverse_single(element)
1619
1620
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\sql\visitors.py in traverse_single(self, obj, **kw)
136 meth = getattr(v, "visit_%s" % obj.__visit_name__, None)
137 if meth:
--> 138 return meth(obj, **kw)
139
140 def iterate(self, obj):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\sql\ddl.py in visit_table(self, table, create_ok, include_foreign_key_constraints, _is_metadata_operation)
824 table,
825 include_foreign_key_constraints= # noqa
--> 826 include_foreign_key_constraints, # noqa
827 )
828 # fmt: on
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object_, *multiparams, **params)
980 raise exc.ObjectNotExecutableError(object_)
981 else:
--> 982 return meth(self, multiparams, params)
983
984 def _execute_function(self, func, multiparams, params):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\sql\ddl.py in _execute_on_connection(self, connection, multiparams, params)
70
71 def _execute_on_connection(self, connection, multiparams, params):
---> 72 return connection._execute_ddl(self, multiparams, params)
73
74 def execute(self, bind=None, target=None):
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _execute_ddl(self, ddl, multiparams, params)
1042 compiled,
1043 None,
-> 1044 compiled,
1045 )
1046 if self._has_events or self.engine._has_events:
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1248 except BaseException as e:
1249 self._handle_dbapi_exception(
-> 1250 e, statement, parameters, cursor, context
1251 )
1252
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1474 util.raise_from_cause(newraise, exc_info)
1475 elif should_wrap:
-> 1476 util.raise_from_cause(sqlalchemy_exception, exc_info)
1477 else:
1478 util.reraise(*exc_info)
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
396 exc_type, exc_value, exc_tb = exc_info
397 cause = exc_value if exc_value is not exception else None
--> 398 reraise(type(exception), exception, tb=exc_tb, cause=cause)
399
400
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
150 value.__cause__ = cause
151 if value.__traceback__ is not tb:
--> 152 raise value.with_traceback(tb)
153 raise value
154
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1244 if not evt_handled:
1245 self.dialect.do_execute(
-> 1246 cursor, statement, parameters, context
1247 )
1248 except BaseException as e:
C:\ProgramData\Miniconda3\envs\qgis_env5\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
586
587 def do_execute(self, cursor, statement, parameters, context=None):
--> 588 cursor.execute(statement, parameters)
589
590 def do_execute_no_params(self, cursor, statement, context=None):
ProgrammingError: (psycopg2.errors.UndefinedObject) type «geometry» does not exist
LINE 28: geometry geometry(MULTIPOLYGON,-1)
^
[SQL:
CREATE TABLE public."table" (
"Shape_Length" FLOAT(53),
"Shape_Area" FLOAT(53),
"INDICE_XY" TEXT,
"VANO_FISICO" TEXT,
"CAMPO_1" TEXT,
"CAMPO_2" TEXT,
"CAMPO_3" TEXT,
"CAMPO_4" TEXT,
"H_VEGE" TEXT,
"Z_VEGE" TEXT,
"FCC" TEXT,
"H_CONDUC_1" TEXT,
"H_CONDUC_2" TEXT,
"H_CONDUC_3" TEXT,
"H_CONDUC_4" TEXT,
"Z_CONDUC_1" TEXT,
"Z_CONDUC_2" TEXT,
"Z_CONDUC_3" TEXT,
"Z_CONDUC_4" TEXT,
"DH_CONDUC_1" TEXT,
"DH_CONDUC_2" TEXT,
"DH_CONDUC_3" TEXT,
"DH_CONDUC_4" TEXT,
"FECHA" TEXT,
"CIRCUITO" TEXT,
geometry geometry(MULTIPOLYGON,-1)
)
]
(Background on this error at: http://sqlalche.me/e/f405)
@CibelesR. Hmm okay, this is weird. Let's try to debug a bit.
If you run following three lines, does it work? (change the mypwd
, myuser
according your database configuration):
import geopandas as gpd
from sqlalchemy import create_engine
data = gpd.read_file("https://gist.githubusercontent.com/HTenkanen/456ec4611a943955823a65729c9cf2aa/raw/be56f5e1e5c06c33cd51e89f823a7d770d8769b5/ykr_basegrid.geojson")
engine = create_engine("postgresql+psycopg2://myuser:mypwd@localhost:5432/mydb")
data.to_postgis(con=engine, name='test_table', if_exists='replace')
If the above works, then there is something happening with the data that you read from the GeoDataBase.
If it does not work, then --> have you enabled the postgis extension in the database? I.e. have you run the command CREATE EXTENSION postgis;
inside the database you are trying to push the data into? If this has not been done, Postgres does not know how to handle geometries in the database.
@HTenkanen, thanks again. I stumbled across this gist by chance by googling "geopandas to postgis" for a project. I will wait until the new release of Geopandas comes out. I was just curious and wanted to test it out!
Hi @HTenkanen.
I have a problem with my PostGIS configuration. I re-install it and now your code is working as well as mine. But, I discover that 3D geometry is not working with .to_postgis().
2D geometry works perfectly and very fast. But with 3D geometry I get:
InvalidParameterValue: Geometry has Z dimension but column does not
CONTEXT: COPY test, línea 1, columna geometry: «01020000803B000000FCA9F152A7EA2B41E07A148EA68E5141D6A3703D0A47524005819583AEEA2B4105819563A78E5141E2...»
I google a little and I found:
I found in the class WKB the methode write() and in this the line:
com.vividsolutions.jts.io.WKBWriter().write( ( (AbstractDefaultGeometry) geom ).getJTSGeometry() );
The Problem with the WKBWrite is that it supports per default 2D so when you want to use 3D you have say this explicietly .So I changed it to:
com.vividsolutions.jts.io.WKBWriter(geom.getCoordinateDimension()).write( ( (AbstractDefaultGeometry) geom ).getJTSGeometry() );
The secound think is by creating the geometry Tables it creates per default just 2D geometry columns with a srid of -1.It would be nice if you can create a handel that it allow to change between 2D and 3D by creating the schema.
Source: http://osgeo-org.1560.x6.nabble.com/Column-has-Z-dimension-but-geometry-does-not-td5018416.html
Maybe it is related. By the way, the .to_postgis() works fine. Thank you so much!!!
@CibelesR Excellent that you got it working! And thanks for reporting about the issues with 3D geometries 👍 I take a look on it.
@HTenkanen as you can see, I am very new programming, so please, if there is a better way to report this issue with 3D geometry I will be glad to do it in the right way
@CibelesR : Typically you would make an issue in GitHub but as this is still a fork (which disables making issues ) and not yet part of the geopandas core, so informing me like this is good, thanks 🙂
@HTenkanen I forgot: I tried on Unix Sistem (macOS) and Windows 10, work fine for both of them
Dear @HTenkanen, I found something and I think it could be an issue. When I upload a geodataframe with gdf.to_postgis() the information and the geometry do not match. An example:
I have a column with 'order' information. This is a calculated column. If I save to a .shp (gdf.to_file('path*.shp')) and I visualized in a GIS software I have 'order' in the right geometry. But If I do load the layer from Postgres this information has changed.
I found this on Windows 10 x64
The code is:
GDB = 'SVA.gdb'
crs = {'init': 'epsg:25830'}
df_gdb = gpd.GeoDataFrame.from_file(GDB, layer=0, crs=crs, geometry='geometry')
df_gdb['tx'] = df_gdb['XY'].apply(lambda x: x.split('_')[0]).astype('float')
df_gdb['ty'] = df_gdb['XY'].apply(lambda x: x.split('_')[1]).astype('float')
df_gdb.sort_values(by=['tx', 'ty'], inplace = True)
tx_min = df_gdb['tx'].min()
ty_min_for_tx_min = df_gdb['ty'].iloc[0]
df_gdb['dist'] = np.sqrt((df_gdb['tx'] - tx_min)**2 + (df_gdb['ty'] - ty_min_for_tx_min)**2)
df_gdb.sort_values(by=['dist'], inplace = True)
df_gdb['order'] = range(1, 1 + len(df_gdb))
Maybe it is easier to understand with an image. Black is the number I get in the GeoPandasDataframe and blue is the number I get after uploading it.
@CibelesR Sorry for not answering to this before. Now 3D geometries are supported as well. What comes to your comment about mismatching indices, we have witnessed the same behavior and are currently looking into this. For some reason, the records seem to be inserted into the PostGIS table sometimes in a different order than they are in the original GeoDataFrame. All the values match, but it can be a bit confusing for the user as you reported.
@CibelesR . Okay, there is now a lot going on in your codes and you are mixing different libraries together which causes the issues.
With the command
data.postgis.to_postgis()
you are now actually trying to use this library:https://github.com/awburgess/geopandas-postgis
Instead of that you should be using a command:
data.to_postgis()
I would suggest that you 1) remove the
geopandas-to-postgis
library from your Python environment. After this you need to install 2) the latest version of geopandas (0.7.0) and 3) then the modifications from my fork. I.e. you should probably do something like:After these you should be able to write your GeoDataFrame to PostGIS using something like following:
Hopefully this helps.
Notice that the latest version of Geopandas (0.7.0) introduces a new CRS class which is quite a big change:
https://geopandas.org/projections.html#upgrading-to-geopandas-0-7-with-pyproj-2-2-and-proj-6