Skip to content

Instantly share code, notes, and snippets.

@xsthunder
Created February 28, 2019 05:25
Show Gist options
  • Save xsthunder/40c75b62d857c14a21f18b0b709a14c3 to your computer and use it in GitHub Desktop.
Save xsthunder/40c75b62d857c14a21f18b0b709a14c3 to your computer and use it in GitHub Desktop.
split string array by regex spliter, counter value frequency
import pandas as pd
def conlusion_hitter_df(arr):
conclusion_counter = {}
def per_item(ac, item):
v = item
if v not in ac:
ac[v] = 0
ac[v] = ac[v] + 1
return ac
conclusion_counter = reduce(per_item, arr, conclusion_counter)
df = pd.DataFrame.from_items(map(lambda x:(x[0], [x[1]]), conclusion_counter.items(),) )
df = df.transpose()
return df.sort_values(by=0, ascending=False)
def flatten_array(array):
def f(ac, arr):
if(isinstance(arr, list)):
ac.extend(arr)
return ac
return reduce(f ,array, [] )
arr = flatten_array(obj_all.values())
import re
spliters = [
r'\d:',
*list(',;'),
]
def apply_string_spilters(spliters, array):
array = [*array]
for spliter in spliters:
for i in range(len(array)):
array[i] = re.split(spliter, array[i])
array = flatten_array(array)
return array
def filter_nonempty_string(array):
array = [*array]
return list(filter(lambda x:isinstance(x, str) and len(x)>0, array))
def apply_rules_on_array(arr):
arr = [*arr]
arr = apply_string_spilters(spliters, arr)
arr = list(map(lambda x:x.strip(), arr))
arr = filter_nonempty_string(arr)
arr = list(filter(lambda x:'?' not in x, arr))
arr = list(filter(lambda x:'?' not in x, arr))
return arr
conlusion_hitter_df( arr ).to_excel('counter_apply_spliter.xlsx')
arr = apply_rules_on_array(arr)
conlusion_hitter_df( arr ).to_excel('counter_apply_spliter.xlsx')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment