Skip to content

Instantly share code, notes, and snippets.

@nkt1546789
Created July 9, 2015 17:07
Show Gist options
  • Select an option

  • Save nkt1546789/1a67b77ba8cffec06107 to your computer and use it in GitHub Desktop.

Select an option

Save nkt1546789/1a67b77ba8cffec06107 to your computer and use it in GitHub Desktop.
Visualization of filetype in your computer.
import os,re
from os import path
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
sns.set(style="white", context="talk")
HOMEPATH="/path/to/your/home_directory"
MAX_DEPTH=5
FILETYPE_DETECTOR=re.compile(r".+\.([a-zA-Z]+)$")
THRESHOLD=100
filenames=[]
def search_files(current_path,depth):
if depth>=MAX_DEPTH:
return
try:
listdir_gen=os.listdir(current_path)
except OSError:
return
for filename in listdir_gen:
filename=path.join(current_path,filename)
if path.isfile(filename):
#print filename
filenames.append(filename)
else:
search_files(path.join(current_path,filename),depth+1)
search_files(HOMEPATH,0)
print "n_files:",len(filenames)
data={}
filetypes=[]
for filename in filenames:
matcher=FILETYPE_DETECTOR.match(filename)
if matcher:
filetype=matcher.group(1)
#print filetype
filetypes.append(filetype)
data.setdefault(filetype,0.)
data[filetype]+=1
#print filetype
print "n_regular_files:",len(filetypes)
print "n_filetypes:",len(set(filetypes))
X=[key for key in data if data[key]>=THRESHOLD]
Y=[data[key] for key in X]
df=pd.DataFrame({"Filetype": X,"Value":Y})
df.plot("Filetype", "Value", kind="barh", color=sns.color_palette("deep", 3),legend=False)
plt.tight_layout(h_pad=3)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment