Created
March 16, 2014 16:19
-
-
Save dozortsev/9585702 to your computer and use it in GitHub Desktop.
File Scanner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package taskB; | |
import org.apache.log4j.Logger; | |
import java.io.*; | |
import java.nio.file.Files; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.HashMap; | |
import java.util.Map; | |
public class FileScanner { | |
private final String path; | |
private static final int BUFFER_SIZE_SMALL = 1024; // 1024 byte | |
private static final int BUFFER_SIZE_MEDIUM = 1048576; // 1 mb | |
private static final int BUFFER_SIZE_BIG = 10485760; // 10 mb | |
private static Logger log = Logger.getLogger(FileScanner.class); | |
/* | |
* Data structure where keys is a size of file and | |
* value is list of canonical path to mapFiles the same size | |
*/ | |
private Map<Long, ArrayList<String>> mapFiles; | |
/* | |
* Default constructor | |
*/ | |
public FileScanner(String path) { | |
this.path = path; | |
mapFiles = new HashMap<>(); | |
} | |
/* | |
* Constructor with the specified initial capacity | |
*/ | |
public FileScanner(String path, int capacity) { | |
this.path = path; | |
mapFiles = new HashMap<>(capacity); | |
} | |
/* | |
* Getter for path | |
*/ | |
String getPath() { | |
return path; | |
} | |
private String toCanonicalPath(File file) { | |
try { | |
return file.getCanonicalPath(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
return null; | |
} | |
/* | |
* Get an input stream that reads bytes from a file | |
*/ | |
protected InputStream getInputStream(File file) throws FileNotFoundException { | |
return new BufferedInputStream(new FileInputStream(file)); | |
} | |
/* | |
* Define buffer size by file length | |
*/ | |
protected int defineBufferSize(long length) { | |
if (length < BUFFER_SIZE_MEDIUM) // file size less than 1mb | |
return BUFFER_SIZE_SMALL; // 1bt | |
if (length < BUFFER_SIZE_BIG) // file size less than 10mb | |
return BUFFER_SIZE_SMALL * 10; // 10bt | |
if (length < BUFFER_SIZE_BIG * 10) // file size less than 100mb | |
return BUFFER_SIZE_MEDIUM; // 1mb | |
if (length < BUFFER_SIZE_BIG * 100) // file size less than 1gb | |
return BUFFER_SIZE_BIG; // 10mb | |
return BUFFER_SIZE_BIG * 10; // 100mb | |
} | |
/* | |
* Search similar files by length in the directory and subdirectories | |
*/ | |
private void scanner(String path) { | |
File[] subDirs = new File(path).listFiles(new FileFilter() { | |
@Override | |
public boolean accept(final File file) { | |
if (file.isFile() && file.canRead()) { | |
long size = file.length(); // length of the file is a key in map | |
if (mapFiles.containsKey(size)) { | |
mapFiles.get(size).add(toCanonicalPath(file)); | |
} | |
else { | |
mapFiles.put(size, new ArrayList<String>(25) {{ | |
add(toCanonicalPath(file)); | |
}}); | |
} | |
return false; | |
} | |
return file.isDirectory() && file.canRead() && !Files.isSymbolicLink(file.toPath()); | |
} | |
}); | |
for (int i = 0; i < subDirs.length; i++) | |
scanner(toCanonicalPath(subDirs[i])); | |
} | |
/* | |
* Compare binary files | |
*/ | |
protected boolean compareFiles(String path1, String path2) { | |
if (path1.equals(path2)) return false; | |
boolean isSimilar = true; | |
final File f1 = new File(path1), f2 = new File(path2); | |
int size = defineBufferSize(f1.length()); | |
byte[] bytesF1 = new byte[size], bytesF2 = new byte[size]; | |
try (InputStream in1 = getInputStream(f1); InputStream in2 = getInputStream(f2)) { | |
while (in1.read(bytesF1) != -1 && in2.read(bytesF2) != -1) { | |
if (!Arrays.equals(bytesF1, bytesF2)) { | |
isSimilar = false; | |
break; | |
} | |
} | |
} catch (IOException e) { | |
log.error("Error:", e); | |
} | |
return isSimilar; | |
} | |
public void searchFiles() { | |
scanner(path); | |
for (ArrayList<String> paths : mapFiles.values()) { | |
if (paths.size() == 1) continue; | |
for (int i = 0; i < paths.size(); i++) { | |
String path1 = paths.get(i); | |
boolean isFound = false; | |
for (int j = 0; j < paths.size();) { | |
String path2 = paths.get(j); | |
if (compareFiles(path1, path2)) { | |
log.info(path2); | |
isFound = paths.remove(path2); | |
} else { | |
j++; | |
} | |
} | |
if (isFound) log.info(path1 + "\n"); | |
paths.remove(path1); | |
} | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Root logger option | |
log4j.rootLogger=INFO, file | |
# Direct log messages to a log file | |
log4j.appender.file=org.apache.log4j.RollingFileAppender | |
log4j.appender.file.File=log/files.log | |
log4j.appender.file.MaxFileSize=4MB | |
log4j.appender.file.MaxBackupIndex=1 | |
log4j.appender.file.layout=org.apache.log4j.PatternLayout | |
log4j.appender.file.layout.ConversionPattern=%m%n |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment