stanlee321 · March 27, 2020 19:03
diff --git a/plot_top_n_words_pandas_column.py b/plot_top_n_words_pandas_column.py
 import matplotlib.pyplot as plt
 import seaborn as sns

 # Aux class for plot frecuency 
 class PlotFrecuency:
    """
    Plot Top N words in a given column dataframe
    """
    def __init__(self, path_to_save):
        self.path_to_save = path_to_save

        self.fontTitle = {
            # 'family': 'serif',
            # 'color':  'darkred',
            'weight': 'bold',
            'size': 20,
            }
        self.fonntY = {
              # 'family': 'serif',
              # 'color':  'darkred',
              'weight': 'bold',
              'size': 15,
            }
        self.fontX = {
              # 'family': 'serif',
            # 'color':  'darkred',
            'weight': 'bold',
            'size': 15,
            
        }
        self.top20 = None


    def get_sorted_tuits(self, counts, top_n=25):
        sorted_twitts =  sorted(counts.items(), key= lambda kv:(kv[1], kv[0]))[::-1][:top_n]
        
        for i, (k,v) in enumerate( sorted_twitts[0:]):
            print(str(i) + "-", f"{self.key_name}: ", k, "   |||",  f"{self.key_value}: ", v)
        return sorted_twitts
    
    def create_df_counts(self, sorted_twitts):
        
        df_x = pd.DataFrame(sorted_twitts, columns=["label", "count"])
        df_x["index"] = df_x.index

        df_x.index = list(df_x["label"])

        df_x = df_x.sort_values(['count'], ascending=False)

        return df_x
    
    
    def get_uniques_and_user_names(self, df):
        
        #user_names = df["commenter"].apply(self.deEmojify)
        user_names = df[self.target_column]
        #user_names = user_names.str.replace(" ", "")

        self.uniques = user_names.unique()
        return self.uniques, user_names
    
    def plot_word_count(self, df, titley='Number of Comments', titlex="Names", title="Message frec"):
        plt.figure(figsize=(10,6))
        ax = sns.barplot(x="index", y="count", data=df, order=df['index'])
        ax.set_title(title, fontdict = self.fontTitle)
        ax.set_xlabel(titlex, fontdict = self.fontX)
        ax.set_ylabel(titley, fontdict = self.fontX)

        # y  axis values font size
        ax.tick_params(axis='y', labelsize=20)

        # Grid
        ax.grid(linestyle='--', linewidth=1)

        labels =  df["count"]

        rects = ax.patches

        for rect, label in zip(rects, labels):
            height = rect.get_height()
            ax.text(rect.get_x() + rect.get_width() / 2, height, label,
                    ha='center', va='bottom', size = 15 )
            
        ax.set_xticklabels(df['label'], rotation='vertical', fontsize=20)  
        
        plt.savefig(self.path_to_save +  "/" + self.title + ".jpg", format="jpg", bbox_inches = 'tight')
        
        
    def create_sorted_counts(self, user_names, top_n=25):
        
        # Count for N
        counts = self.get_counts(user_names)
        sorted_counts = self.get_sorted_tuits(counts, top_n = top_n)
        
        return sorted_counts
    
    
    def get_counts(self, user_names):
        
        counts = {}
        for u in user_names:
            counts[u] = []

        for u in user_names:
            counts[u].append(u)
        for k,v in counts.items():
            counts[k] = len(v)
            
        return counts
    
    def create_top_N_users(self, sorted_tuits, n=21):
            
        # Word Frecuency for N
        df_x = self.create_df_counts(sorted_tuits)
        
        # Plot For N
        topN = df_x.iloc[0:n]
        
        return topN
    
    def main(self, df, top_n = 21,  target_column = "Departamento",
                                    name="Departamento", 
                                    value="Frecuencia", 
                                    title_x="Departamento", 
                                    title_y="Frecuencia", 
                                    title="Acciones por departamento"):
        
        # Set some parameters for the plot
        self.target_column = target_column
        self.key_name = name
        self.key_value = value
        self.title_x = title_x
        self.title_y = title_y
        self.title = title

        self.uniques, self.user_names = self.get_uniques_and_user_names(df)
        self.sorted_counts = self.create_sorted_counts(self.user_names, top_n=top_n)
        self.top20 = self.create_top_N_users(self.sorted_counts, n=top_n)
            
        
        self.plot_word_count(self.top20, 
                             titley=self.title_y,
                             titlex=self.title_x,
                             title=self.title)
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Aux class for plot frecuency
	class PlotFrecuency:
	"""
	Plot Top N words in a given column dataframe
	"""
	def __init__(self, path_to_save):
	self.path_to_save = path_to_save

	self.fontTitle = {
	# 'family': 'serif',
	# 'color': 'darkred',
	'weight': 'bold',
	'size': 20,
	}
	self.fonntY = {
	# 'family': 'serif',
	# 'color': 'darkred',
	'weight': 'bold',
	'size': 15,
	}
	self.fontX = {
	# 'family': 'serif',
	# 'color': 'darkred',
	'weight': 'bold',
	'size': 15,

	}
	self.top20 = None


	def get_sorted_tuits(self, counts, top_n=25):
	sorted_twitts = sorted(counts.items(), key= lambda kv:(kv[1], kv[0]))[::-1][:top_n]

	for i, (k,v) in enumerate( sorted_twitts[0:]):
	print(str(i) + "-", f"{self.key_name}: ", k, " \|\|\|", f"{self.key_value}: ", v)
	return sorted_twitts

	def create_df_counts(self, sorted_twitts):

	df_x = pd.DataFrame(sorted_twitts, columns=["label", "count"])
	df_x["index"] = df_x.index

	df_x.index = list(df_x["label"])

	df_x = df_x.sort_values(['count'], ascending=False)

	return df_x


	def get_uniques_and_user_names(self, df):

	#user_names = df["commenter"].apply(self.deEmojify)
	user_names = df[self.target_column]
	#user_names = user_names.str.replace(" ", "")

	self.uniques = user_names.unique()
	return self.uniques, user_names

	def plot_word_count(self, df, titley='Number of Comments', titlex="Names", title="Message frec"):
	plt.figure(figsize=(10,6))
	ax = sns.barplot(x="index", y="count", data=df, order=df['index'])
	ax.set_title(title, fontdict = self.fontTitle)
	ax.set_xlabel(titlex, fontdict = self.fontX)
	ax.set_ylabel(titley, fontdict = self.fontX)

	# y axis values font size
	ax.tick_params(axis='y', labelsize=20)

	# Grid
	ax.grid(linestyle='--', linewidth=1)

	labels = df["count"]

	rects = ax.patches

	for rect, label in zip(rects, labels):
	height = rect.get_height()
	ax.text(rect.get_x() + rect.get_width() / 2, height, label,
	ha='center', va='bottom', size = 15 )

	ax.set_xticklabels(df['label'], rotation='vertical', fontsize=20)

	plt.savefig(self.path_to_save + "/" + self.title + ".jpg", format="jpg", bbox_inches = 'tight')


	def create_sorted_counts(self, user_names, top_n=25):

	# Count for N
	counts = self.get_counts(user_names)
	sorted_counts = self.get_sorted_tuits(counts, top_n = top_n)

	return sorted_counts


	def get_counts(self, user_names):

	counts = {}
	for u in user_names:
	counts[u] = []

	for u in user_names:
	counts[u].append(u)
	for k,v in counts.items():
	counts[k] = len(v)

	return counts

	def create_top_N_users(self, sorted_tuits, n=21):

	# Word Frecuency for N
	df_x = self.create_df_counts(sorted_tuits)

	# Plot For N
	topN = df_x.iloc[0:n]

	return topN

	def main(self, df, top_n = 21, target_column = "Departamento",
	name="Departamento",
	value="Frecuencia",
	title_x="Departamento",
	title_y="Frecuencia",
	title="Acciones por departamento"):

	# Set some parameters for the plot
	self.target_column = target_column
	self.key_name = name
	self.key_value = value
	self.title_x = title_x
	self.title_y = title_y
	self.title = title

	self.uniques, self.user_names = self.get_uniques_and_user_names(df)
	self.sorted_counts = self.create_sorted_counts(self.user_names, top_n=top_n)
	self.top20 = self.create_top_N_users(self.sorted_counts, n=top_n)


	self.plot_word_count(self.top20,
	titley=self.title_y,
	titlex=self.title_x,
	title=self.title)