Skip to content

Instantly share code, notes, and snippets.

@aqzlpm11
Created July 18, 2018 08:22
Show Gist options
  • Save aqzlpm11/2a2bdf5a3f5c684a502b8dd5f64c0310 to your computer and use it in GitHub Desktop.
Save aqzlpm11/2a2bdf5a3f5c684a502b8dd5f64c0310 to your computer and use it in GitHub Desktop.
Hive import csv demo
from pyhive import hive# or import hive
from pyhdfs import HdfsClient
data_csv = '/root/demo/data/train.csv'
hdfs = HdfsClient('hadoopmaster.hadoop.test', user_name='admin')
hdfs.copy_from_local(data_csv, '/user/admin/data/titanic/train.csv')
hive_cursor= hive.connect('hadoopslave1.hadoop.test', username='admin').cursor()
sql = r'''
CREATE TABLE tmp (
PassengerId INT,
Survived INT,
Pclass INT,
Name STRING,
Sex STRING,
Age INT,
SibSp INT,
Parch INT,
Ticket STRING,
Fare DOUBLE,
Cabin STRING,
Embarked STRING
)
row format serde
'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with
SERDEPROPERTIES
("separatorChar"=",","quotechar"="\"")
STORED AS TEXTFILE
'''
print(sql)
hive_cursor.execute(sql)
sql = """
CREATE TABLE `titanic` (
PassengerId INT,
Survived INT,
Pclass INT,
Name STRING,
Sex STRING,
Age INT,
SibSp INT,
Parch INT,
Ticket STRING,
Fare DOUBLE,
Cabin STRING,
Embarked STRING
)
STORED AS ORC
"""
print(sql)
hive_cursor.execute(sql)
sql = "load data inpath '/user/admin/data/titanic/train.csv' OVERWRITE into table tmp"
print(sql)
hive_cursor.execute(sql)
sql = 'FROM `tmp` INSERT INTO TABLE `titanic` SELECT *'
print(sql)
hive_cursor.execute(sql)
sql = 'DROP TABLE `tmp`'
print(sql)
hive_cursor.execute(sql)
#print cursor.fetchone()
#print cursor.fetchall()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment