Skip to content

Instantly share code, notes, and snippets.

@TheWaWaR
Last active August 29, 2015 14:18
Show Gist options
  • Save TheWaWaR/855086f2d74f3d0a0931 to your computer and use it in GitHub Desktop.
Save TheWaWaR/855086f2d74f3d0a0931 to your computer and use it in GitHub Desktop.
Mysql dump python script (为了省内存,分片将 MySQL 表导出为 CSV 格式。)
#!/usr/bin/env python
#coding: utf-8
u"""
为了省内存,分片将 MySQL 表导出为 CSV 格式。
"""
import os
import argparse
from datetime import datetime
import pymysql as mdb
SELECT_SQL_TMPL = 'SELECT * FROM {} WHERE id>{} ORDER BY id ASC LIMIT {}'
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--table', required=True, help='Target table name')
parser.add_argument('-l', '--limit', type=int, default=5000, help='The limit number of one select')
parser.add_argument('-o', '--output', help='Output file name')
parser.add_argument('-H', '--host', default='127.0.0.1', help='Mysql host')
parser.add_argument('-P', '--port', type=int, default=3306, help='Mysql host')
parser.add_argument('-d', '--database', required=True, help='Mysql database')
parser.add_argument('-u', '--user', required=True, help='Mysql user')
parser.add_argument('-p', '--passwd', required=True, help='Mysql password')
args = parser.parse_args()
if not args.output:
args.output = '.'.join([args.table, 'csv'])
if os.path.exists(args.output):
parser.error('File exists: {}'.format(args.output))
print 'Args:', args
return args
def get_rows(conn, table, max_id, limit):
c = conn.cursor()
c.execute(SELECT_SQL_TMPL.format(table, max_id, limit))
rows = c.fetchall()
c.close()
return rows
def main():
args = parse_args()
conn = mdb.connect(host=args.host, port=args.port,
database=args.database,
user=args.user, passwd=args.passwd)
print '{}, >>> Start!'.format(datetime.now())
max_id = 0
count = 0
fw = open(args.output, 'w')
while True:
rows = get_rows(conn, args.table, max_id, args.limit)
if not rows:
break
count += len(rows)
max_id = int(rows[-1][0])
print '{}, Count: {}, Max-id: {}'.format(datetime.now(), count, max_id)
for row in rows:
fw.write('\t'.join([str(col) for col in row])+'\n')
fw.flush()
fw.close()
conn.close()
print '{}, >>> DONE!'.format(datetime.now())
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment