prasku5 · May 4, 2018 05:43
diff --git a/Sqoop_Import_Split_By b/Sqoop_Import_Split_By
 sqoop import \   
 --connect jdbc:mysql://localhost/source_database_name \ (This is the path to access the Source DB using JDBC Driver)  
 --username <username> \                                 (Source Database Username)
 --password <password> \                                 (Source Database password)
 --database source_database_name \                       (The database name will become folder name in target HDFS )
 --target-dir <path of the directory> \
 --hive-import \
 --hive-table query_import \
 --boundary-query 'SELECT 0, MAX(id) FROM a' \           (The boundary query allows sqoop to know the range of records that need to participate in import process)    
 --query 'SELECT a.id, a.name, b.id, b.name FROM a, b WHERE a.id = b.id AND $CONDITIONS'\
 --num-mappers 3                                         (This will result in parallelism and we should choose the number of mapper with care)
 --split-by a.id \                                       (we are splitting some column for our performance and it
	sqoop import \
	--connect jdbc:mysql://localhost/source_database_name \ (This is the path to access the Source DB using JDBC Driver)
	--username <username> \ (Source Database Username)
	--password <password> \ (Source Database password)
	--database source_database_name \ (The database name will become folder name in target HDFS )
	--target-dir <path of the directory> \
	--hive-import \
	--hive-table query_import \
	--boundary-query 'SELECT 0, MAX(id) FROM a' \ (The boundary query allows sqoop to know the range of records that need to participate in import process)
	--query 'SELECT a.id, a.name, b.id, b.name FROM a, b WHERE a.id = b.id AND $CONDITIONS'\
	--num-mappers 3 (This will result in parallelism and we should choose the number of mapper with care)
	--split-by a.id \ (we are splitting some column for our performance and it