Skip to content

Instantly share code, notes, and snippets.

@dubeyji10
Created June 6, 2022 08:17
Show Gist options
  • Save dubeyji10/5297a404aad7319bb87f176c89c72b32 to your computer and use it in GitHub Desktop.
Save dubeyji10/5297a404aad7319bb87f176c89c72b32 to your computer and use it in GitHub Desktop.
shardded jsons of 100 records each - clients table
print('iterate with index - steps of 100')
import mysql.connector
from mysql.connector import Error
import logging
import re
import time
from datetime import datetime , timedelta
import json
now = datetime.now()
fileName = now.strftime('%Y_%m_%d_%H_%M_%S')
logging.basicConfig(filename="logs\\shardingJSONS_2_"+fileName+"_LOGS.log", level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S')
startTime = now
print('script started at : {}'.format(startTime.strftime('%Y-%m-%d %H:%M:%S')))
logging.info('script started at : {}'.format(startTime.strftime('%Y-%m-%d %H:%M:%S')))
print('creating shards of huge amount of data\n')
sizeOfShard = 100
index = 0
counter = 0
#lastIndex = 479,300
lastIndex = 80000 # test with clients table ~ around 80k records
alpha = re.compile(r"[a-z|A-Z]|[!@#$%^&*/]")
clients = 'select * from clients'
leads = 'select * from leads'
conversation = 'select * from conversation'
invoiceInfo = 'select * from invoice_info'
invoiceItems = 'select * from invoice_items'
#
# if query gives null response add condition
#
# try an outrage query
# build condtion logic for that response
#
for i in range(index , lastIndex ,sizeOfShard):
query = ''
print(' ------------- {} -------------- '.format(counter))
logging.info(' ------------- {} -------------- '.format(counter))
# print('{} -> running for limit {},{}'.format(counter,i,sizeOfShard))
print('{} -> limit {} OFFSET {}'.format(counter,sizeOfShard , i))
offSet = ' LIMIT {} OFFSET {} '.format(sizeOfShard,i)
with mysql.connector.connect() as connectPointer:
with connectPointer.cursor() as aCursor:
query = clients+offSet
print('running - ',query)
print('1. fetching clients')
logging.info('\t1. fetching clients : {}'.format(query))
aCursor.execute(query)
result = aCursor.fetchall()
if not result:
print('--> no clients left in table <--')
logging.info('__________ no clients created left to write __________')
else:
print('writing clients to json')
clientsFile = 'clients\\clients_index_{}.json'.format(i)
# ----------------------- fields to json items ---------------------------
client_collection_list = []
for rows in result:
client_collection = {}
# convert it to string otherwise it raises error
local_id = rows[0]
cp1 = rows[1]
cp2 = rows[2]
name = rows[3] # company name - Name - since not null guaranteed entry
company_grade = rows[4]
phoneNo = str(rows[5]) # error in db remove if not a valid number
mn_1 = rows[6]
mn_2 = rows[7]
email = rows[8]
designation = rows[9]
address = str(rows[10]) # remove special chars from it - results in unvalidated json
website = rows[11]
user_id = rows[12]
added_on = str(rows[13])
last_conv = rows[14]
followup_on = str(rows[15])
last_mail_no = rows[16]
last_mail_on =str(rows[17])
last_sms_no = rows[18]
last_sms_on = str(rows[19])
country = rows[21]
if not bool(re.search(alpha,phoneNo)):
client_collection['phone_no'] = phoneNo
# print("{ 'phone_no' : ",phoneNo,"}")
else:
client_collection['phone_no'] = None
# print("{ 'phone_no' : ","'null'","}")
client_collection['Name'] = name
client_collection['company_name'] = name
client_collection['user_id'] = user_id
client_collection['local_id'] = local_id
client_collection['added_on'] = added_on.replace(' ','T') + '+05:30'
if len(followup_on)<5:
client_collection['followup_on'] = None
else:
client_collection['followup_on'] = followup_on.replace(' ','T') + '+05:30'
if len(last_mail_on)<5:
client_collection['last_mail_on'] = None
else:
client_collection['last_mail_on'] = last_mail_on.replace(' ','T') + '+05:30'
if len(last_sms_on)<5:
client_collection['last_sms_on'] = None
else:
client_collection['last_sms_on'] = last_sms_on.replace(' ' ,'T') + '+05:30'
client_collection['company_grade'] = company_grade
client_collection['website'] = website
client_collection['last_conversation'] = last_conv
client_collection['country'] = country
client_collection['address'] = re.sub('[^A-Za-z0-9]' , ' ',address)
client_collection['mobile_no1'] = mn_1
client_collection['mobile_no2'] = mn_2
client_collection['contact_person1'] = cp1
client_collection['contact_person2'] = cp2
del name
del user_id
del local_id
del added_on
del company_grade
del phoneNo
del mn_1
del mn_2
del address
del website
del followup_on
del last_mail_no
del last_mail_on
del last_sms_on
del last_sms_no
del cp1
del cp2
# print(json.dumps(client_collection))
client_collection_list.append(client_collection)
# payloadJsons\\clients_1_temp.json - read this payload to push
clientsFile = 'clients/clients_index_{}.json'.format(i)
with open(clientsFile,'w',encoding='utf-8') as clientJson:
clientJson.write(json.dumps(
{"data":client_collection_list},indent=4,default=str,sort_keys=True
))
print(f' . .. check {clientsFile}')
logging.info(f'\t\t . .. check {clientsFile}')
time.sleep(2)
counter+=1
now = datetime.now()
endTime = now
print('script ended at : {}'.format(endTime.strftime('%Y-%m-%d %H:%M:%S')))
logging.info('script ended at : {}'.format(endTime.strftime('%Y-%m-%d %H:%M:%S')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment