Skip to content

Instantly share code, notes, and snippets.

View RaMSFT's full-sized avatar

RaMS Nelluri RaMSFT

View GitHub Profile
## Import Regular Expression - Used to replace all special characters other than alphanumeric
import re
## Input
giventext = "This is Medium article presented by ramstkp in the month of October. On the day of writing it was cold, and autumn started early this in the october month. October month is relatively less cold compared to winter months"
## Replacing all other characters other than alphanumerics
giventext = re.sub('[^a-zA-Z0-9 \n]', '', giventext)
## Converting to lower and splitting the text to list by word (split by space)
@RaMSFT
RaMSFT / word_count_loop
Last active October 14, 2021 06:59
This snippet of code finds most repeated words from a given text
## Import Regular Expression - Used to replace all special characters other than alphanumeric
import re
## Input
giventext = "This is Medium article presented by ramstkp in the month of October. On the day of writing it was cold, and autumn started early this in the october month. October month is relatively less cold compared to winter months"
## Replacing all characters other than alphanumerics
giventext = re.sub('[^a-zA-Z0-9 \n]', '', giventext)
## Converting to lower and splitting the text to list by word (split by space)
@RaMSFT
RaMSFT / word_count_counter.py
Created October 14, 2021 07:21
Most Common words using counter method
## Import Regular Expression - Used to replace all special characters other than alphanumeric
import re
## Import Counter from collections
from collections import Counter
## Input
giventext = "This is Medium article presented by ramstkp in the month of October. On the day of writing it was cold, and autumn started early this in the october month. October month is relatively less cold compared to winter months"
## Replacing all other characters other than alphanumerics
giventext = re.sub('[^a-zA-Z0-9 \n]', '', giventext)
## Import Pandas to make dataframe
import pandas as pd
## Import Regular Expression - Used to replace all special characters other than alphanumeric
import re
## Import Counter from collections
from datetime import datetime
## Input
@RaMSFT
RaMSFT / mount_credentials_passthrough.py
Last active October 26, 2021 07:21
This piece of code is to mount a ADLS Gen2 Container / directory with Databricks
#Don't change configs
configs = {
"fs.azure.account.auth.type": "CustomAccessToken",
"fs.azure.account.custom.token.provider.class": spark.conf.get("spark.databricks.passthrough.adls.gen2.tokenProviderClassName")
}
"""
One need following details from ADLS
1. Your container Name (Optionally, coresponding directory name)
2. Your Storage account Name
## import lit from sql functions - useful to add withcolumn a constant value
from pyspark.sql.functions import lit
## Provide mount with directory where the files exists
mount_path = '/mnt/<Your mount name>/<directory>'
## loop through the files
for file in dbutils.fs.ls(mount_path):
## This could be better with defining a schema
if 'flights1.csv' in file.name:
## Provide mount with directory where the files exists
mount_path = '/mnt/<mount name>/<directory>'
spark.sql(f"create table flights_data_2 using csv location '{mount_path}/*.csv' options(header 'true', inferSchema 'true', sep ',')")
## run a group by command on registered table
resultdf = spark.sql("select input_file_name() as filename, count(*) from flights_data_2 group by filename")
resultdf.display()
import pandas as pd
import random
## Provide file name with path for example: "C:\Users\xxxxx\flights.csv"
split_source_file = input("File Name with absolute Path? : ")
## find number of lines using Pandas
pd_dataframe = pd.read_csv(split_source_file, header=0)
number_of_rows = len(pd_dataframe.index) + 1
import pandas as pd
import timeit
def count_lines_enumrate_list(file_name):
fp = open(file_name,'r')
line_count = list(enumerate(fp))[-1][0]
return line_count
import random
## Define list of operators required
operators = ['+','-','*','/']
## generate random numbers based on random complexity counter
def get_random_numbers(random_complexity):
num1 = random.randint(1, random_complexity)
num2 = random.randint(1, random_complexity)