Skip to content

Instantly share code, notes, and snippets.

@JazzyJes
Created June 17, 2015 14:42
Show Gist options
  • Save JazzyJes/7d43cd8f2b4aafb6f0fa to your computer and use it in GitHub Desktop.
Save JazzyJes/7d43cd8f2b4aafb6f0fa to your computer and use it in GitHub Desktop.
Example on how to get the Inode of a file and use it to find out whether Logstash has processed the file - in a Spring Integration filter.
package stasher.filters;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.integration.file.filters.AbstractFileListFilter;
import org.springframework.stereotype.Component;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.HashMap;
import java.util.Map;
/**
* Created by cherguij on 19-05-2015.
* Stasher - stasher
*/
@Component
public class ProcessedFileFilter extends AbstractFileListFilter<File> {
/** Logger */
private static final Logger LOGGER = LoggerFactory.getLogger(ProcessedFileFilter.class);
/** .sincedb column index */
private static final int INODE_COLUMN_INDEX = 0;
/** .sincedb column index */
private static final int SCANNED_SIZE_COLUMN_INDEX = 3;
/** Path where sincedb files are. */
@Value("${setup.sincedb.path}")
private String pathSinceDb;
/**
* Checks whether a file is within the backfill period.
* Example file name: sdk.20150122215629.log
*/
@Override
protected boolean accept(File file) {
boolean isAccepted = false;
int itemInode = getInode(file);
long itemSize = file.length();
LOGGER.debug(String.format("Analyzing: %s, inode: %s, size: %s", file.getName(), itemInode, itemSize));
File sincedbFile = lastModifiedFile(pathSinceDb);
LOGGER.debug(String.format(".sincedb file: %s", sincedbFile.getName()));
Map<Integer, Long> sincedbMap = new HashMap();
try {
Reader in = new FileReader(sincedbFile);
Iterable<CSVRecord> records = CSVFormat.DEFAULT.withDelimiter(' ').withIgnoreEmptyLines().parse(in);
for (CSVRecord record : records) {
sincedbMap.put(
Integer.parseInt(record.get(INODE_COLUMN_INDEX)),
Long.parseLong(record.get(SCANNED_SIZE_COLUMN_INDEX)));
}
} catch (IOException e) {
e.printStackTrace();
}
if (sincedbMap.containsKey(itemInode)) {
LOGGER.debug(String.format("Key %s found for file %s", "" + itemInode, file.getName()));
Long size = sincedbMap.get(itemInode);
LOGGER.debug(String.format("SIZE :%s, itemSize: %s", size, itemSize));
if (size == itemSize) {
// TODO set in last modification comparison and day to keep?
// To delete
LOGGER.debug("TRUE file is processed: " + file.getName());
isAccepted = true;
}
} else {
// NOOP not found in sincedbFile
LOGGER.debug("FALSE: sincedb doesn't contain the log file: " + file.getName());
isAccepted = false;
}
return isAccepted;
}
/**
* Gets the UNIX unique handler for a given file
* @param file the file to get inode for.
*/
private static int getInode(final File file) {
assert (file != null);
int inode;
BasicFileAttributes attr = null;
Path path = Paths.get(file.getAbsolutePath());
try {
attr = Files.readAttributes(path, BasicFileAttributes.class);
} catch (IOException e) {
e.printStackTrace();
}
if (attr != null) {
Object fileKey = attr.fileKey();
String s = fileKey.toString();
LOGGER.debug("fileKEY: " + s);
String in = s.substring(s.indexOf("ino=") + 4, s.indexOf(")"));
inode = Integer.parseInt(in);
LOGGER.debug("Inode found: " + inode);
} else {
inode = -1;
}
return inode;
}
/**
* Returns the newest file from inside a given folder
* @param dir where to look
* @return the latest file or null if not defined.
*/
// TODO move sincedb directory to specific one.
private static File lastModifiedFile(final String dir) {
assert (dir != null);
File fl = new File(dir);
File[] files = fl.listFiles(new FileFilter() {
public boolean accept(final File file) {
return (file.isFile() && file.getName().contains(".sincedb_"));
}
});
long lastMod = Long.MIN_VALUE;
File choice = null;
if (files != null && files.length > 0) {
for (File file : files) {
if (file.lastModified() > lastMod) {
choice = file;
lastMod = file.lastModified();
}
}
} else {
LOGGER.warn("No .sincedb files where found in path: " + dir);
}
// todo never return null - throw folder not found, folder empty... etc.
return choice;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment