Created
July 18, 2018 08:22
-
-
Save aqzlpm11/2a2bdf5a3f5c684a502b8dd5f64c0310 to your computer and use it in GitHub Desktop.
Hive import csv demo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyhive import hive# or import hive | |
from pyhdfs import HdfsClient | |
data_csv = '/root/demo/data/train.csv' | |
hdfs = HdfsClient('hadoopmaster.hadoop.test', user_name='admin') | |
hdfs.copy_from_local(data_csv, '/user/admin/data/titanic/train.csv') | |
hive_cursor= hive.connect('hadoopslave1.hadoop.test', username='admin').cursor() | |
sql = r''' | |
CREATE TABLE tmp ( | |
PassengerId INT, | |
Survived INT, | |
Pclass INT, | |
Name STRING, | |
Sex STRING, | |
Age INT, | |
SibSp INT, | |
Parch INT, | |
Ticket STRING, | |
Fare DOUBLE, | |
Cabin STRING, | |
Embarked STRING | |
) | |
row format serde | |
'org.apache.hadoop.hive.serde2.OpenCSVSerde' | |
with | |
SERDEPROPERTIES | |
("separatorChar"=",","quotechar"="\"") | |
STORED AS TEXTFILE | |
''' | |
print(sql) | |
hive_cursor.execute(sql) | |
sql = """ | |
CREATE TABLE `titanic` ( | |
PassengerId INT, | |
Survived INT, | |
Pclass INT, | |
Name STRING, | |
Sex STRING, | |
Age INT, | |
SibSp INT, | |
Parch INT, | |
Ticket STRING, | |
Fare DOUBLE, | |
Cabin STRING, | |
Embarked STRING | |
) | |
STORED AS ORC | |
""" | |
print(sql) | |
hive_cursor.execute(sql) | |
sql = "load data inpath '/user/admin/data/titanic/train.csv' OVERWRITE into table tmp" | |
print(sql) | |
hive_cursor.execute(sql) | |
sql = 'FROM `tmp` INSERT INTO TABLE `titanic` SELECT *' | |
print(sql) | |
hive_cursor.execute(sql) | |
sql = 'DROP TABLE `tmp`' | |
print(sql) | |
hive_cursor.execute(sql) | |
#print cursor.fetchone() | |
#print cursor.fetchall() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment