pavelgordon · March 25, 2020 19:31
diff --git a/gist.py b/gist.py
 import pandas as pd
 import numpy
 df= pd.read_excel('dwfef.xlsx')

 arr = df.to_numpy().flatten() #convert 2dimentional dataframe to 1dimentional list
 arr = arr[~pd.isnull(arr)] # remove 'nan' - empty strings
 striper = lambda t: str(t.strip())
 arr = np.array([striper(xi) for xi in arr]).astype(str) # remove whitespaces before and after each word
 unique_elements, counts_elements = np.unique(arr, return_counts=True)

 zipped = list(zip(unique_elements, counts_elements))
 zipped.sort(key = lambda t: -t[1]) #reverse sort them
 print(zipped[0:20])
	import pandas as pd
	import numpy
	df= pd.read_excel('dwfef.xlsx')

	arr = df.to_numpy().flatten() #convert 2dimentional dataframe to 1dimentional list
	arr = arr[~pd.isnull(arr)] # remove 'nan' - empty strings
	striper = lambda t: str(t.strip())
	arr = np.array([striper(xi) for xi in arr]).astype(str) # remove whitespaces before and after each word
	unique_elements, counts_elements = np.unique(arr, return_counts=True)

	zipped = list(zip(unique_elements, counts_elements))
	zipped.sort(key = lambda t: -t[1]) #reverse sort them
	print(zipped[0:20])