Pradeep Singh mepsrajput

🎯

Focusing

ML Engineer

mepsrajput / simple_freq_procedure.sas

Last active April 14, 2022 14:52

Simple proc freq

	/* Import the CSV */

	FILENAME Gov_C "/folders/myfolders/Assignments/governors_county.csv";

	PROC IMPORT DATAFILE=Gov_C DBMS=CSV OUT=WORK.Gov_C_SAS;
	GETNAMES=YES;
	RUN;

	/* freq procedure */
	proc freq data=Gov_C_SAS;

mepsrajput / simple_freq_procedure_python.py

Last active April 14, 2022 14:29

	# Import Pandas
	import pandas as pd

	# Import CSV
	data = pd.read_csv("../input/us-election-2020/governors_county.csv");

	# Frequencies in Actual Order
	datax = data['state'].value_counts().sort_index()

	# Create a dataframe

mepsrajput / proc_freq_descending.sas

Created April 14, 2022 14:40

	/* Proc freq with descending order */
	proc freq data=Gov_C_SAS order=freq;
	tables state;
	run;

mepsrajput / proc_freq_descending.py

Created April 14, 2022 14:42

	datax = data['state'].value_counts()
	datay = pd.DataFrame({
	'state': datax.index,
	'Frequency': datax.values,
	'Percent': ((datax.values/datax.values.sum())*100).round(2),
	'Cumulative Frequenc': datax.values.cumsum(),
	'Cumulative Percen':((datax.values.cumsum()/datax.values.sum())*100).round(2)
	})

	datay

mepsrajput / proc_freq_options.sas

Created April 14, 2022 14:45

	proc freq data = Gov_C_SAS;
	tables state / nopercent nocum;
	run;

mepsrajput / proc_freq_options.py

Last active April 14, 2022 14:48

	datax = data['state'].value_counts().sort_index()
	datay = pd.DataFrame({
	'state': datax.index,
	'Frequency': datax.values
	})

	datay

mepsrajput / freq_cross_tab.sas

Created April 14, 2022 14:50

	proc freq data=Gov_C_SAS;
	tables county*state / norow nocol nopercent;
	run;

mepsrajput / freq_crosstab.py

Created April 14, 2022 14:51

	datab = pd.crosstab(data.county, data.state, margins=True, margins_name='Total')

	datab

mepsrajput / freq_procedure_with_missing.sas

Last active April 14, 2022 16:35

mepsrajput / proc_freq_with_missing.py

Created April 14, 2022 16:40

	datax = data['GENRE'].value_counts(dropna=False)
	datay = pd.DataFrame({
	'GENRE': datax.index,
	'Frequency': datax.values,
	'Percent': ((datax.values/datax.values.sum())*100).round(2),
	'Cumulative Frequency': datax.values.cumsum(),
	'Cumulative Percent': ((datax.values.cumsum()/datax.values.sum())*100).round(2)
	})

	datay