Last active
November 15, 2023 15:32
-
-
Save relipse/0fec8412b61dbe2aa40d7639c3432136 to your computer and use it in GitHub Desktop.
List files and some statistics in directory for huge directories.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* dirlist | |
* A program to list files in a directory with some information as fast as possible. | |
* Useful for directories with 500,000 files. | |
* | |
* @date 2023-10-23 | |
* @author relipse | |
*/ | |
#include <sys/stat.h> | |
#include <time.h> | |
#include <stdio.h> | |
#include <cstring> | |
#include <dirent.h> | |
#include <locale.h> | |
#include <string> | |
#include <fstream> | |
#include <iostream> | |
#include <map> | |
#include <vector> | |
using namespace std; | |
void printHelp(int argc, char *argv[]){ | |
printf("%s [Options] [directory]\n", argv[0]); | |
printf("List number of files and each file (if -f,--hide-files is not set)\n" | |
"in a directory as fast as possible,\n" | |
"also list alphabetical min and max (by filename)\n"); | |
printf("\nOptions:\n"); | |
printf("-f,--hide-files Do not show each file\n"); | |
printf("-d,--dots Use 1 period (dot) to represent each file\n"); | |
printf("-s,--file-size Show file sizes and min/max (much slower)\n"); | |
printf("-c,--created Show created/modified dates and min/max\n"); | |
printf("-g,--grep <match> Only show file names matching <match>\n"); | |
printf("-h,--help Show this page\n"); | |
printf("\nExamples:\n"); | |
printf("# ./dirlist\n" | |
"1. dirlist\n" | |
"2. dirlist.cpp\n" | |
"3. Makefile" | |
"\n" | |
"Directory: . contains 3 files\n" | |
"Alphabetical Max: dirlist.cpp\n" | |
"Alphabetical Min: Makefile\n"); | |
printf("\n# ./dirlist -d\n" | |
"...\n" | |
"Directory: . contains 3 files\n" | |
"Alphabetical Max: dirlist.cpp\n" | |
"Alphabetical Min: Makefile\n" | |
); | |
} | |
void printExtensionMap(std::map<string,unsigned long>& m){ | |
cout << "Extensions List (extension: count):" << endl; | |
for(map<string,unsigned long>::const_iterator it = m.begin(); | |
it != m.end(); ++it) | |
{ | |
std::cout << "\"" << it->first << "\": " << it->second << endl; | |
} | |
} | |
int main(int argc, char *argv[]) { | |
DIR *dir; | |
struct dirent *ent; | |
unsigned long count = 0; | |
string theDir = "."; | |
string max = ""; | |
string min = ""; | |
string file; | |
string fileTypes = ""; | |
bool printAllFiles = true; | |
bool printDots = false; | |
bool printSize = false; | |
unsigned long fileSizeSum = 0; | |
unsigned long maxSize = 0; | |
unsigned long minSize = 0; | |
unsigned long fileSize = 0; | |
string maxSizeFileName = ""; | |
string minSizeFileName = ""; | |
string grep = ""; | |
std::vector<string> aryGrep; | |
std::map<string,unsigned long> foundExts; | |
setlocale(LC_NUMERIC, ""); | |
bool printMoreFileStats = false; | |
string earliestCreatedDate = ""; | |
string latestCreatedDate = ""; | |
string earliestCreatedFile = ""; | |
string latestCreatedFile = ""; | |
if (argc > 1){ | |
string arg; | |
for (int i = 1; i < argc; ++i){ | |
arg = argv[i]; | |
if (arg == "--hide-files" || arg == "-f"){ | |
printAllFiles = false; | |
}else if (arg == "-c" || arg == "--created"){ | |
printMoreFileStats = true; | |
}else if (arg == "-s" || arg == "--file-size"){ | |
printSize = true; | |
}else if (arg == "-h" || arg == "--help"){ | |
printHelp(argc, argv); | |
return 0; | |
}else if (arg == "--dots" || arg == "-d"){ | |
printDots = true; | |
printAllFiles = false; | |
}else if (arg == "--grep" || arg == "-g"){ | |
if (i+1 >= argc){ | |
cout << "--grep|-g needs next argument (missing)." << endl; | |
printHelp(argc, argv); | |
return 0; | |
}else{ | |
grep = argv[i+1]; | |
aryGrep.push_back(grep); | |
i++; | |
} | |
}else{ | |
theDir = arg; | |
if (theDir == ""){ | |
theDir = "."; | |
} | |
} | |
} | |
} | |
if (aryGrep.size() > 0){ | |
cout << "Grep on, -g,--grep matching all terms: "; | |
for(unsigned int i = 0; i < aryGrep.size(); ++i){ | |
if (i > 0){ cout << ", "; } | |
cout << aryGrep[i]; | |
} | |
cout << endl; | |
} | |
dir = opendir(theDir.c_str()); | |
while((ent = readdir(dir))){ | |
file = ent->d_name; | |
std::string::size_type idx; | |
idx = file.rfind('.'); | |
if (file == ".." || file == "."){ | |
continue; | |
} | |
if (aryGrep.size() > 0){ | |
unsigned int matches = 0; | |
for (unsigned int i = 0; i < aryGrep.size(); ++i){ | |
grep = aryGrep[i]; | |
if (file.find(grep) != string::npos){ | |
matches++; | |
} | |
} | |
//all greps must match or we go to next file | |
if (matches != aryGrep.size()){ | |
//no grep matches found, go to next file | |
continue; | |
} | |
} | |
string extension = ""; | |
if(idx != std::string::npos) | |
{ | |
extension = file.substr(idx+1); | |
} | |
if (!foundExts.count(extension)){ | |
foundExts[extension] = 0; | |
} | |
foundExts[extension]++; | |
++count; | |
if (max == "" || ent->d_name > max){ | |
max = ent->d_name; | |
} | |
if (min == "" || ent->d_name < min){ | |
min = ent->d_name; | |
} | |
if (printAllFiles){ | |
printf("%'lu. %s", count, ent->d_name); | |
if (printMoreFileStats){ | |
string modtime = ""; | |
string createdtime = ""; | |
struct stat t_stat; | |
stat(ent->d_name, &t_stat); | |
struct tm * timeinfo = localtime(&t_stat.st_ctime); | |
struct tm * timemod = localtime(&(t_stat.st_mtime)); | |
char buffer [80]; | |
strftime(buffer, 80, "%Y-%m-%d %H:%M:%S",timeinfo); | |
string ct = buffer; | |
strftime(buffer, 80, "%Y-%m-%d %H:%M:%S",timemod); | |
string mt = buffer; | |
if (earliestCreatedDate == "" || ct < earliestCreatedDate){ | |
earliestCreatedDate = ct; | |
earliestCreatedFile = ent->d_name; | |
} | |
if (latestCreatedDate == "" || ct > latestCreatedDate){ | |
latestCreatedDate = ct; | |
latestCreatedFile = ent->d_name; | |
} | |
createdtime = asctime(timeinfo); | |
createdtime.erase (strcspn (createdtime.c_str(), "\r\n")); | |
modtime = asctime(timemod); | |
modtime.erase(strcspn(modtime.c_str(), "\r\n")); | |
if (createdtime == modtime){ | |
cout << " (created/modified: " << createdtime; | |
}else{ | |
cout << " (created: " << createdtime << ", modified: " << modtime; | |
} | |
cout << ")"; | |
} | |
if (printSize){ // -s, --file-size | |
ifstream in_file(ent->d_name, ios::binary); | |
in_file.seekg(0, ios::end); | |
fileSize = (unsigned long) in_file.tellg(); | |
fileSizeSum += fileSize; | |
if (fileSize > maxSize){ | |
maxSize = fileSize; | |
maxSizeFileName = ent->d_name; | |
} | |
if (minSize == 0 || fileSize < minSize){ | |
minSize = fileSize; | |
minSizeFileName = ent->d_name; | |
} | |
printf(" %'lu bytes", fileSize); | |
} | |
printf("\n"); | |
} | |
if (printDots){ | |
printf("."); | |
} | |
} | |
closedir(dir); | |
if (printDots){ | |
printf("\n"); | |
} | |
if (aryGrep.size() > 0){ | |
cout << "Directory " << theDir << " contains " << count << " matching files." << endl; | |
}else{ | |
printf("Directory: %s contains %'lu files\n", theDir.c_str(), count); | |
} | |
printf("Alphabetical Max: %s\n", max.c_str()); | |
printf("Alphabetical Min: %s\n", min.c_str()); | |
if (printSize){ | |
printf("Largest File: %s %'lu bytes\n", maxSizeFileName.c_str(), maxSize); | |
printf("Smallest File: %s %'lu bytes\n", minSizeFileName.c_str(), minSize); | |
printf("Total File Size Summation: %'lu bytes\n", fileSizeSum); | |
} | |
if (printMoreFileStats){ | |
cout << "Earliest File: " << earliestCreatedFile << " (created: " << earliestCreatedDate << ")" << endl; | |
cout << "Latest File: " << latestCreatedFile << " (created: " << latestCreatedDate << ")" << endl; | |
} | |
printExtensionMap(foundExts); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment