Created
February 28, 2019 05:25
-
-
Save xsthunder/40c75b62d857c14a21f18b0b709a14c3 to your computer and use it in GitHub Desktop.
split string array by regex spliter, counter value frequency
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def conlusion_hitter_df(arr): | |
conclusion_counter = {} | |
def per_item(ac, item): | |
v = item | |
if v not in ac: | |
ac[v] = 0 | |
ac[v] = ac[v] + 1 | |
return ac | |
conclusion_counter = reduce(per_item, arr, conclusion_counter) | |
df = pd.DataFrame.from_items(map(lambda x:(x[0], [x[1]]), conclusion_counter.items(),) ) | |
df = df.transpose() | |
return df.sort_values(by=0, ascending=False) | |
def flatten_array(array): | |
def f(ac, arr): | |
if(isinstance(arr, list)): | |
ac.extend(arr) | |
return ac | |
return reduce(f ,array, [] ) | |
arr = flatten_array(obj_all.values()) | |
import re | |
spliters = [ | |
r'\d:', | |
*list(',;'), | |
] | |
def apply_string_spilters(spliters, array): | |
array = [*array] | |
for spliter in spliters: | |
for i in range(len(array)): | |
array[i] = re.split(spliter, array[i]) | |
array = flatten_array(array) | |
return array | |
def filter_nonempty_string(array): | |
array = [*array] | |
return list(filter(lambda x:isinstance(x, str) and len(x)>0, array)) | |
def apply_rules_on_array(arr): | |
arr = [*arr] | |
arr = apply_string_spilters(spliters, arr) | |
arr = list(map(lambda x:x.strip(), arr)) | |
arr = filter_nonempty_string(arr) | |
arr = list(filter(lambda x:'?' not in x, arr)) | |
arr = list(filter(lambda x:'?' not in x, arr)) | |
return arr | |
conlusion_hitter_df( arr ).to_excel('counter_apply_spliter.xlsx') | |
arr = apply_rules_on_array(arr) | |
conlusion_hitter_df( arr ).to_excel('counter_apply_spliter.xlsx') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment