ghl3 · December 14, 2015 18:09
diff --git a/pandas_plotting.py b/pandas_plotting.py
 def scatter_plots(df, class_title, feature_names=None, cmap=None):
    
    # Create a pandas group view for each class
    class_names = list(set(df[class_title]))
    groups = [df[(df[class_title] == name)] for name in class_names]

    if feature_names==None:
        feature_names = [col for col in df.columns if col != class_title]
            
    NUM_COLORS = len(class_names)
    if cmap==None:
        cmap = plt.get_cmap('gist_rainbow')
    colors = [cmap(1.*i/NUM_COLORS) for i in range(NUM_COLORS)]
    
    def pair_scatter(colA, colB):
        for group, name, color in zip(groups, class_names, colors):
            plt.scatter(group[colA], group[colB], marker="o", color=color, label=name)   
        plt.legend()
        plt.xlabel(colA)
        plt.ylabel(colB)
    
    features = df[feature_names]

    import itertools
    combos = list(itertools.combinations(features, 2))
    nrows = math.ceil(math.sqrt(len(combos)))
    for idx, (colA, colB) in enumerate(combos):
        plt.subplot(nrows, nrows, idx+1)
        pair_scatter(colA, colB)


 def feature_histograms(df, class_title, feature_names=None, nbins=30, cmap=None):

    class_names = list(set(df[class_title]))
    groups = [df[(df[class_title] == name)] for name in class_names]

    NUM_COLORS = len(class_names)
    if cmap==None:
        cmap = plt.get_cmap('gist_rainbow')
    colors = [cmap(1.*i/NUM_COLORS) for i in range(NUM_COLORS)]
    
    if feature_names==None:
        feature_names = [col for col in df.columns if col != class_title]
    
    def feature_hist(feature):
    
        bin_range = min(df[feature]), max(df[feature])
        for group, class_name, color in zip(groups, class_names, colors):
            plt.hist(group[feature], color=color, label=class_name, bins=nbins,
                     range=bin_range)  
        plt.legend()
        plt.xlabel(feature)

    import math
    nrows = math.ceil(math.sqrt(len(feature_names)))

    for idx, feature in enumerate(feature_names):
        plt.subplot(nrows, nrows, idx+1)
        feature_hist(feature)
	def scatter_plots(df, class_title, feature_names=None, cmap=None):

	# Create a pandas group view for each class
	class_names = list(set(df[class_title]))
	groups = [df[(df[class_title] == name)] for name in class_names]

	if feature_names==None:
	feature_names = [col for col in df.columns if col != class_title]

	NUM_COLORS = len(class_names)
	if cmap==None:
	cmap = plt.get_cmap('gist_rainbow')
	colors = [cmap(1.*i/NUM_COLORS) for i in range(NUM_COLORS)]

	def pair_scatter(colA, colB):
	for group, name, color in zip(groups, class_names, colors):
	plt.scatter(group[colA], group[colB], marker="o", color=color, label=name)
	plt.legend()
	plt.xlabel(colA)
	plt.ylabel(colB)

	features = df[feature_names]

	import itertools
	combos = list(itertools.combinations(features, 2))
	nrows = math.ceil(math.sqrt(len(combos)))
	for idx, (colA, colB) in enumerate(combos):
	plt.subplot(nrows, nrows, idx+1)
	pair_scatter(colA, colB)


	def feature_histograms(df, class_title, feature_names=None, nbins=30, cmap=None):

	class_names = list(set(df[class_title]))
	groups = [df[(df[class_title] == name)] for name in class_names]

	NUM_COLORS = len(class_names)
	if cmap==None:
	cmap = plt.get_cmap('gist_rainbow')
	colors = [cmap(1.*i/NUM_COLORS) for i in range(NUM_COLORS)]

	if feature_names==None:
	feature_names = [col for col in df.columns if col != class_title]

	def feature_hist(feature):

	bin_range = min(df[feature]), max(df[feature])
	for group, class_name, color in zip(groups, class_names, colors):
	plt.hist(group[feature], color=color, label=class_name, bins=nbins,
	range=bin_range)
	plt.legend()
	plt.xlabel(feature)

	import math
	nrows = math.ceil(math.sqrt(len(feature_names)))

	for idx, feature in enumerate(feature_names):
	plt.subplot(nrows, nrows, idx+1)
	feature_hist(feature)
No results found