Created
May 12, 2017 11:59
-
-
Save bobquest33/be7c6898d0d94c04cead754d2b7a311a to your computer and use it in GitHub Desktop.
The below code uses Faker library which has many functions to get random values for address, people, telephone number and various other types of data. I used the Faker library and randomized form for BIC data add to the test data.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from faker import Faker | |
| from random import randint | |
| import pycountry | |
| import pandas as pd | |
| fake = Faker() | |
| df = pd.read_csv("total_bic.csv") | |
| swift_bics=list(df["swift"]) | |
| val = {} | |
| # Generate a random number with N digits | |
| def random_with_N_digits(n): | |
| range_start = 10**(n-1) | |
| range_end = (10**n)-1 | |
| return randint(range_start, range_end) | |
| # Function to generate the test data for the POC that has to be masked | |
| def gen_rand_test_data(): | |
| val['ACCOUNT_ID'] = fake.country_code()+"%010d" %(random_with_N_digits(5)) | |
| val['TEL_NUM'] = fake.phone_number() | |
| val['STREET_ADDRESS'] = fake.street_address() | |
| val['SECONDARY_ADDRESS']= fake.secondary_address() | |
| val['CITY']= fake.city() | |
| val['COUNTRY_CODE']=fake.country_code() | |
| val['COUNTRY']= pycountry.countries.get(alpha_2=val['COUNTRY_CODE']).name | |
| if val['COUNTRY_CODE']=="US": | |
| val['ZIP_CODE']= fake.zipcode() | |
| else: | |
| val['POSTAL_CODE']=fake.postalcode() | |
| val['SWIFT_ADDR']=unicode(swift_bics[randint(0,len(swift_bics)-1)].strip()) | |
| val['EMAIL_ADDR']=fake.company_email() | |
| val['CNTCT_PRSN']=fake.name() | |
| val['CMPNY_NAME']=fake.company() | |
| val['FAX_NUM']=fake.phone_number() | |
| return val | |
| # Save the data to CSV | |
| import csv | |
| import codecs | |
| with codecs.open('test_bank_dat.csv', 'w','utf-8','ignore') as csvfile: | |
| writer = csv.DictWriter(csvfile, fieldnames=header) | |
| writer.writeheader() | |
| for i in range(10000000): | |
| tval = gen_rand_test_data() | |
| for key in tval: | |
| try: | |
| tval[key]=tval[key].encode('ascii','ignore') | |
| except: | |
| print tval | |
| writer.writerow(tval) | |
| print "Test bank Data Created" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment