Created
October 23, 2017 04:12
-
-
Save daa233/b8cb379d6110db05dd67a500c90db55f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- encoding=utf-8 -*- | |
# Python2 代码 | |
import os | |
import csv | |
# Open the csv and write headers. | |
with open("files_count.txt",'wb') as out: | |
outwriter = csv.writer(out) | |
# outwriter.writerow(['Directory','FilesInDir','FilesIncludingSubdirs']) | |
# Track total number of files in each subdirectory by absolute path | |
totals = {} | |
# topdown=False iterates lowest level (leaf) subdirectories first. | |
# This way I can collect grand totals of files per subdirectory. | |
for path,dirs,files in os.walk('FINAL_Plankton_Segments_12082014',topdown=False): | |
files_in_current_directory = len(files) | |
# Start with the files in the current directory and compute a | |
# total for all subdirectories, which will be in the `totals` | |
# dictionary already due to topdown=False. | |
files_including_subdirs = files_in_current_directory | |
for d in dirs: | |
fullpath = os.path.abspath(os.path.join(path,d)) | |
# On my Windows system, Junctions weren't included in os.walk, | |
# but would show up in the subdirectory list. this try skips | |
# them because they won't be in the totals dictionary. | |
try: | |
files_including_subdirs += totals[fullpath] | |
except KeyError as e: | |
print 'KeyError: {} may be symlink/junction'.format(e) | |
totals[os.path.abspath(path)] = files_including_subdirs | |
# outwriter.writerow([path,files_in_current_directory,files_including_subdirs]) | |
outwriter.writerow([path,files_in_current_directory]) | |
# # convert csv file to list and print | |
# import csv | |
# with open('Subject_Task_Count.csv', 'rb') as f: | |
# reader = csv.reader(f) | |
# my_list = list(reader) | |
import numpy as np | |
np.set_printoptions(suppress=True) # 不使用科学计数法显示数字 | |
data = np.genfromtxt('files_count.txt', delimiter=',') | |
counts = data[:-1, [1]] # exclude the last one, it is not a class of plankton images | |
print counts | |
max_num = np.max(counts) | |
min_num = np.min(counts) | |
imbalance_rate = max_num / min_num | |
print "max_num = " + str(max_num) # 3883 | |
print "min_num = " + str(min_num) # 20 | |
print "imbalance_rate = " + str(imbalance_rate) # 194.15 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
thanks for this script, but, i Would like to know how can I make one condition to make a specific sort for the count ( per example count all file the doc extension in folder and subfolder).
I have try try but one error appear :