Created
February 21, 2021 17:16
-
-
Save nsivabalan/74476a48493fac2a9b8fdea4cd973ca9 to your computer and use it in GitHub Desktop.
py test stack trace
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docker run test_hudi py.test -s --verbose test_hudi.py | |
============================= test session starts ============================== | |
platform linux -- Python 3.7.9, pytest-6.1.1, py-1.10.0, pluggy-0.13.1 -- /usr/bin/python3 | |
cachedir: .pytest_cache | |
rootdir: / | |
collecting ... collected 1 item | |
test_hudi.py::test_hudi Ivy Default Cache set to: /root/.ivy2/cache | |
The jars for the packages stored in: /root/.ivy2/jars | |
:: loading settings :: url = jar:file:/usr/local/lib/python3.7/site-packages/pyspark/jars/ivy-2.4.0.jar!/org/apache/ivy/core/settings/ivysettings.xml | |
org.apache.hudi#hudi-spark-bundle_2.12 added as a dependency | |
org.apache.spark#spark-avro_2.12 added as a dependency | |
org.apache.spark#spark-sql_2.12 added as a dependency | |
:: resolving dependencies :: org.apache.spark#spark-submit-parent-58d523e8-14cb-4100-8ee6-04eb7c7954d0;1.0 | |
confs: [default] | |
found org.apache.hudi#hudi-spark-bundle_2.12;0.7.0 in central | |
found org.apache.spark#spark-avro_2.12;3.0.0 in central | |
found org.spark-project.spark#unused;1.0.0 in central | |
downloading https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.7.0/hudi-spark-bundle_2.12-0.7.0.jar ... | |
[SUCCESSFUL ] org.apache.hudi#hudi-spark-bundle_2.12;0.7.0!hudi-spark-bundle_2.12.jar (1037ms) | |
downloading https://repo1.maven.org/maven2/org/apache/spark/spark-avro_2.12/3.0.0/spark-avro_2.12-3.0.0.jar ... | |
[SUCCESSFUL ] org.apache.spark#spark-avro_2.12;3.0.0!spark-avro_2.12.jar (23ms) | |
downloading https://repo1.maven.org/maven2/org/spark-project/spark/unused/1.0.0/unused-1.0.0.jar ... | |
[SUCCESSFUL ] org.spark-project.spark#unused;1.0.0!unused.jar (17ms) | |
:: resolution report :: resolve 2852ms :: artifacts dl 1082ms | |
:: modules in use: | |
org.apache.hudi#hudi-spark-bundle_2.12;0.7.0 from central in [default] | |
org.apache.spark#spark-avro_2.12;3.0.0 from central in [default] | |
org.spark-project.spark#unused;1.0.0 from central in [default] | |
--------------------------------------------------------------------- | |
| | modules || artifacts | | |
| conf | number| search|dwnlded|evicted|| number|dwnlded| | |
--------------------------------------------------------------------- | |
| default | 3 | 3 | 3 | 0 || 3 | 3 | | |
--------------------------------------------------------------------- | |
:: retrieving :: org.apache.spark#spark-submit-parent-58d523e8-14cb-4100-8ee6-04eb7c7954d0 | |
confs: [default] | |
3 artifacts copied, 0 already retrieved (36431kB/61ms) | |
21/02/21 17:15:07 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties | |
Setting default log level to "WARN". | |
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). | |
21/02/21 17:15:49 WARN DefaultSource: Loading Base File Only View. | |
21/02/21 17:15:49 WARN DataSource: All paths were ignored: | |
file:/tmp/pytest-of-root/pytest-0/test_hudi0/.hoodie/.aux/.bootstrap | |
FAILED | |
=================================== FAILURES =================================== | |
__________________________________ test_hudi ___________________________________ | |
tmp_path = PosixPath('/tmp/pytest-of-root/pytest-0/test_hudi0') | |
def test_hudi(tmp_path): | |
SparkContext.getOrCreate( | |
conf=SparkConf() | |
.setAppName("testing") | |
.setMaster("local[6]") | |
.set( | |
"spark.jars.packages", | |
"org.apache.hudi:hudi-spark-bundle_2.12:0.7.0,org.apache.spark:spark-avro_2.12:3.0.0,org.apache.spark:spark-sql_2.12:3.0.0", | |
) | |
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") | |
.set("spark.sql.hive.convertMetastoreParquet", "false") | |
) | |
spark = SparkSession.builder.getOrCreate() | |
hudi_options = { | |
"hoodie.table.name": "test", | |
"hoodie.datasource.write.recordkey.field": "id", | |
"hoodie.datasource.write.keygenerator.class": "org.apache.hudi.keygen.SimpleKeyGenerator", | |
"hoodie.datasource.write.partitionpath.field": "year,month,day", | |
"hoodie.datasource.write.table.name": "test", | |
"hoodie.datasource.write.table.type": "COPY_ON_WRITE", | |
"hoodie.datasource.write.operation": "upsert", | |
"hoodie.datasource.write.precombine.field": "ts", | |
} | |
df = spark.createDataFrame( | |
[ | |
Row(id=1, year=2020, month=7, day=5, ts=1), | |
] | |
) | |
#import findspark | |
#findspark.init() | |
df.write.format("hudi").options(**hudi_options).mode("append").save(str(tmp_path)) | |
#read_df = spark.read.format("parquet").load(str(tmp_path) + "/*/*/*") | |
# This works | |
#print(read_df.collect()) | |
> read_df = spark.read.format("hudi").load(str(tmp_path) + "/*/*/*") | |
test_hudi.py:47: | |
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | |
usr/local/lib/python3.7/site-packages/pyspark/sql/readwriter.py:178: in load | |
return self._df(self._jreader.load(path)) | |
usr/local/lib/python3.7/site-packages/py4j/java_gateway.py:1305: in __call__ | |
answer, self.gateway_client, self.target_id, self.name) | |
usr/local/lib/python3.7/site-packages/pyspark/sql/utils.py:137: in deco | |
raise_from(converted) | |
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | |
e = AnalysisException('Unable to infer schema for Parquet. It must be specified manually.;', 'org.apache.spark.sql.Analysi...:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n\tat java.lang.Thread.run(Thread.java:748)\n', None) | |
> ??? | |
E pyspark.sql.utils.AnalysisException: Unable to infer schema for Parquet. It must be specified manually.; | |
<string>:3: AnalysisException | |
=============================== warnings summary =============================== | |
test_hudi.py::test_hudi | |
/usr/local/lib/python3.7/site-packages/pyspark/sql/context.py:77: DeprecationWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead. | |
DeprecationWarning) | |
-- Docs: https://docs.pytest.org/en/stable/warnings.html | |
=========================== short test summary info ============================ | |
FAILED test_hudi.py::test_hudi - pyspark.sql.utils.AnalysisException: Unable ... | |
======================== 1 failed, 1 warning in 49.08s ========================= |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment