Skip to content

Instantly share code, notes, and snippets.

View RaMSFT's full-sized avatar

RaMS Nelluri RaMSFT

View GitHub Profile
import re
import timeit
def count_vowels_loop_over_vowels(text):
vowel_count = 0
for vowel in vowels_list:
count = text.count(vowel)
vowel_count += count
## Import pandas
import pandas as pd
## Provide the File Name
split_source_file = 'z_flights_info.csv'
## Create a Pandas data frame
df = pd.read_csv(split_source_file, header=0, delimiter=",")
## Extract only the columns of the DataFrame
import requests
import http.client
def apicall_using_requests():
url = "https://api.postcodes.io/random/postcodes"
payload={}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
# create a python list of tupels list_of_cars
list_of_cars = [(1,'Honda'),(2,'BMW'), (3,'Ford'),(4,'Toyota'),(5,'Skoda'),(6,'Mercedes'),(7,'Tata'),(8,'Audi')]
# create a list with columns of above data
columns = ['id','car_brands']
#Create dataframe using createDataFrame method
list_dataframe = spark.createDataFrame(list_of_cars, columns)
# Show the data from dataframe
from faker import Faker
import pandas as pd
def customer_data(out_file, country, number_of_records):
out_file_name = ''
fake = Faker(country)
data_list = []
columns_list = ['id', 'FirstName', 'LastName','Gender', 'DateOfBirth','PhoneNumber','Occupation','Company','PersonalEmail','OfficialEmail','BSNNumber','IBAN','HouseNumber','StreetName','PostCode','City','Country']
for val in range(number_of_records):
data_list.append([fake.unique.random_int(), fake.first_name(), fake.last_name(), fake.profile()['sex'], fake.date_of_birth(),fake.phone_number(), fake.job(), fake.company(), fake.email(), fake.company_email(), fake.ssn(), fake.iban(), fake.building_number(),fake.street_name(), fake.postcode(), fake.city(), 'Netherlands'])
We can make this file beautiful and searchable if this error is corrected: It looks like row 6 should actually have 17 columns, instead of 6 in line 5.
id,FirstName,LastName,Gender,DateOfBirth,PhoneNumber,Occupation,Company,PersonalEmail,OfficialEmail,BSNNumber,IBAN,HouseNumber,StreetName,PostCode,City,Country
3735,Wessel,Vermeulen,F,1940-06-22,+31512-531714,Video editor,Alliander,[email protected],[email protected],940178655,NL08ZILK1351375430,819,Jortboulevard,8888 BU,Sint Hubert,Netherlands
8293,Fien,Welf,M,1984-03-30,+31(0)40-3189349,"Merchandiser, retail",Caldic,[email protected],[email protected],496235813,NL43WDTA3651245147,23,Guusjehof,1414 QY,Echtenerbrug,Netherlands
5479,Thijmen,Westermann,F,1995-04-13,+31225 804072,Commercial art gallery manager,Stichting Zaal,[email protected],[email protected],981063457,NL10LFUW1979776303,55,Livweg,4252VE,Maasbommel,Netherlands
4250,Helena,Klomp Jan,M,1978-05-22,+31(0)22-5501449,Restaurant manager,Wildschut BV,[email protected],[email protected],813645074,NL73QFPU3196932371,254,Nynkedreef,3154PM,Roelofarendsveen,Netherlands
8791,Loïs,van der Schuijt,F,1949-09-17,+31(0)41
We can make this file beautiful and searchable if this error is corrected: It looks like row 6 should actually have 17 columns, instead of 6 in line 5.
id,FirstName,LastName,Gender,DateOfBirth,PhoneNumber,Occupation,Company,PersonalEmail,OfficialEmail,BSNNumber,IBAN,HouseNumber,StreetName,PostCode,City,Country
3735,Wessel,Vermeulen,F,1940-06-22,+31512-531714,Video editor,Alliander,[email protected],[email protected],940178655,NL08ZILK1351375430,819,Jortboulevard,8888 BU,Sint Hubert,Netherlands
8293,Fien,Welf,M,1984-03-30,+31(0)40-3189349,"Merchandiser, retail",Caldic,[email protected],[email protected],496235813,NL43WDTA3651245147,23,Guusjehof,1414 QY,Echtenerbrug,Netherlands
5479,Thijmen,Westermann,F,1995-04-13,+31225 804072,Commercial art gallery manager,Stichting Zaal,[email protected],[email protected],981063457,NL10LFUW1979776303,55,Livweg,4252VE,Maasbommel,Netherlands
4250,Helena,Klomp Jan,M,1978-05-22,+31(0)22-5501449,Restaurant manager,Wildschut BV,[email protected],[email protected],813645074,NL73QFPU3196932371,254,Nynkedreef,3154PM,Roelofarendsveen,Netherlands
8791,Loïs,van der Schuijt,F,1949-09-17,+31(0)41
#Creating an empty DF (This is kind of an Hack...)
tbl_df = spark.sql("show tables in user_data like 'xxx'")
#Loop through all databases
for db in spark.sql("show databases like '*data'").collect():
#create a dataframe with list of tables from the database
df = spark.sql(f"show tables in {db.databaseName}")
#union the tables list dataframe with main dataframe
tbl_df = tbl_df.union(df)
#After the loop, show the results
import http
import json
def call_publicapi(table_name):
"""
purpose:
This function does bla bla bla.
params: reads 3 global parameters
status, execution_log: for sttaus and loogging
database name: for creating database
response: