Skip to content

Instantly share code, notes, and snippets.

@qixiaobo
Created August 17, 2018 08:59
Show Gist options
  • Save qixiaobo/8098babdaac6ab05903d0b662b03c481 to your computer and use it in GitHub Desktop.
Save qixiaobo/8098babdaac6ab05903d0b662b03c481 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Function
# Find duplicate class among java libs.
#
# @Usage
# $ show-duplicate-java-classes # find jars from current dir
# $ show-duplicate-java-classes path/to/lib_dir1 /path/to/lib_dir2
# $ show-duplicate-java-classes -c path/to/class_dir1 -c /path/to/class_dir2
#
# @online-doc https://github.com/oldratlee/useful-scripts/blob/master/docs/java.md#-show-duplicate-java-classes
# @author tg123 (farmer1992 at gmail dot com)
# @author Jerry Lee (oldratlee at gmail dot com)
__author__ = 'tg123'
from glob import glob
from os import walk
from zipfile import ZipFile
from os.path import relpath, isdir
from optparse import OptionParser
def list_jar_file_under_lib_dirs(libs):
jar_files = set()
for lib in libs:
if isdir(lib):
jar_files |= {f for f in glob(lib + '/*.jar')}
else:
jar_files.add(lib)
return jar_files
def list_class_under_jar_file(jar_file):
return {f for f in ZipFile(jar_file).namelist() if f.lower().endswith('.class')}
def list_class_under_class_dir(class_dir):
return {relpath(dir_path + "/" + filename, class_dir)
for dir_path, _, file_names in walk(class_dir)
for filename in file_names if filename.lower().endswith('.class')}
def expand_2_class_path(jar_files, class_dirs):
java_class_2_class_paths = {}
# list all classes in jar files
for jar_file in jar_files:
for class_file in list_class_under_jar_file(jar_file):
java_class_2_class_paths.setdefault(class_file, set()).add(jar_file)
# list all classes in class dir
for class_dir in class_dirs:
for class_file in list_class_under_class_dir(class_dir):
java_class_2_class_paths.setdefault(class_file, set()).add(class_dir)
return java_class_2_class_paths, jar_files | set(class_dirs)
def find_duplicate_classes(java_class_2_class_paths):
class_path_2_duplicate_classes = {}
for java_class, class_paths in list(java_class_2_class_paths.items()):
if len(class_paths) > 1:
classes = class_path_2_duplicate_classes.setdefault(frozenset(class_paths), set())
classes.add(java_class)
return class_path_2_duplicate_classes
def print_class_paths(class_paths):
print()
print("=" * 80)
print("class paths to find:")
print("=" * 80)
for idx, class_path in enumerate(class_paths):
print("%-3d: %s" % (idx + 1, class_path))
if __name__ == '__main__':
optionParser = OptionParser('usage: %prog '
'[-c class-dir1 [-c class-dir2] ...] '
'[lib-dir1|jar-file1 [lib-dir2|jar-file2] ...]')
optionParser.add_option("-c", "--class-dir", dest="class_dirs", default=[], action="append", help="add class dir")
options, libs = optionParser.parse_args()
if not options.class_dirs and not libs:
libs = ['.']
java_class_2_class_paths, class_paths = expand_2_class_path(
list_jar_file_under_lib_dirs(libs), options.class_dirs)
class_path_2_duplicate_classes = find_duplicate_classes(java_class_2_class_paths)
if not class_path_2_duplicate_classes:
print("COOL! No duplicate classes found!")
print_class_paths(class_paths)
exit()
print("Found duplicate classes in below class path:")
for idx, jars in enumerate(class_path_2_duplicate_classes):
print("%-3d(%d@%d): %s" % (idx + 1, len(class_path_2_duplicate_classes[jars]), len(jars), " ".join(jars)))
print()
print("=" * 80)
print("Duplicate classes detail info:")
print("=" * 80)
for idx, (jars, classes) in enumerate(class_path_2_duplicate_classes.items()):
print("%-3d(%d@%d): %s" % (idx + 1, len(class_path_2_duplicate_classes[jars]), len(jars), " ".join(jars)))
for i, c in enumerate(classes):
print("\t%-3d %s" % (i + 1, c))
print_class_paths(class_paths)
exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment