commonquail · January 12, 2019 18:13
diff --git a/.gitignore b/.gitignore
 *.class
 indiv18.zip
 itcont.txt
 sample.txt
diff --git a/Makefile b/Makefile
 CLASS_NAME = ReadFileJavaApplicationBufferedReader7

 %.class: %.java
 	javac $<

 .PHONY: test
 test: $(CLASS_NAME).class sample.txt
 	java -Xmx4G $(CLASS_NAME) sample.txt

 .PHONY: run
 run: $(CLASS_NAME).class
 	java -Xmx4G $(CLASS_NAME) itcont.txt

 .PHONY: download-large-file
 download-large-file: itcont.txt

 indiv18.zip:
 	wget https://www.fec.gov/files/bulk-downloads/2018/indiv18.zip

 itcont.txt: indiv18.zip
 	unzip indiv18.zip itcont.txt

 sample.txt: itcont.txt
 	head -n 44000 $< > $@
diff --git a/ReadFileJavaApplicationBufferedReader7.java b/ReadFileJavaApplicationBufferedReader7.java
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.time.Duration;
 import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.regex.Pattern;

 import static java.util.stream.Collectors.counting;
 import static java.util.stream.Collectors.groupingBy;

 public class ReadFileJavaApplicationBufferedReader7 {
    // returns the time between startTime and now in milliseconds
    static long between(Instant startTime) {
        return Duration.between(startTime, Instant.now()).toMillis();
    }

    public static void main(String[] args) throws IOException {
        Instant startTime = Instant.now();

        try (BufferedReader b = Files.newBufferedReader(Path.of(args[0]))) {
            int[] indexes = {0, 432, 43243};

            List<String> names = new ArrayList<>();
            List<String> dates = new ArrayList<>();
            List<String> firstNames = new ArrayList<>();

            var namePat = Pattern.compile(", \\s*([^, ]+)");
            StringBuilder sb = new StringBuilder(7);

            System.out.println("Reading file using " + Caller.getName());

            /*
             * Line format:
             *
             * 0 | 1 | 2 | 3 | DATE | 5 | 6 | NAME | 8 | ...
             *   ^   ^   ^   ^      ^   ^   ^      ^
             *   1   2   3   4      5   6   7      8
             */
            String readLine;
            while ((readLine = b.readLine()) != null) {

                // There are at least 3 separators before the first separator
                // we're interested in so we don't need to check the first 3
                // characters.
                int startFieldIdx = 1 + nthIndexOf(readLine, '|', 4, 3);
                int endFieldIdx = readLine.indexOf('|', startFieldIdx);

                // extract dates
                String rawDate = readLine.substring(startFieldIdx, endFieldIdx).strip();
                sb.setLength(0);
                sb.append(rawDate, 0, 4)
                  .append('-')
                  .append(rawDate, 4, 6);
                dates.add(sb.toString());

                // get all the names
                startFieldIdx = 1 + nthIndexOf(readLine, '|', 2, endFieldIdx);
                endFieldIdx = readLine.indexOf('|', startFieldIdx);

                String name = readLine.substring(startFieldIdx, endFieldIdx).strip();
                names.add(name);

                // extract first names
                var matcher = namePat.matcher(name);
                if (matcher.find()) {
                    firstNames.add(matcher.group(1));
                }
            }

            for (int i : indexes) {
                System.out.println("Name: " + names.get(i) + " at index: " + i);
            }

            System.out.println("Name time: " + between(startTime) + "ms");

            System.out.println("Total file line count: " + names.size());
            System.out.println("Line count time: " + between(startTime) + "ms");

            Map<String, Long> dateMap = dates.stream()
                    .collect(groupingBy(date -> date, counting()));

            dateMap.forEach((date, count)
                    -> System.out.println("Donations per month and year: " + date + " and donation count: " + count));

            System.out.println("Donations time: " + between(startTime) + "ms");

            Map<String, Long> nameMap = firstNames.stream()
                    .collect(groupingBy(name -> name, counting()));

            Entry<String, Long> common = Collections.max(nameMap.entrySet(), Entry.comparingByValue());

            System.out.println("The most common first name is: " + common.getKey() + " and it occurs: " + common.getValue() + " times.");
            System.out.println("Most common name time: " + between(startTime) + "ms");
        }
    }

    /**
     * Finds the nth occurrence of {@code c} in {@code s} <em>after</em> index
     * {@code start}. To find a character at index 0, call with {@code start}
     * equal to -1.
     */
    static int nthIndexOf(String s, char c, int n, int start) {
        int x = start;
        while (n-- > 0) {
            x = s.indexOf(c, x + 1);
        }
        return x;
    }
 }

 class Caller {
    // gets the simple name of the caller's class
    public static String getName() {
        return StackWalker.getInstance()
                          .walk(s -> s.skip(1)
                                      .findFirst()
                                      .map(StackWalker.StackFrame::getClassName)
                                      .map(name -> name.replaceFirst("^.*\\.", ""))
                                      .orElse(""));
    }
 }
	CLASS_NAME = ReadFileJavaApplicationBufferedReader7

	%.class: %.java
	javac $<

	.PHONY: test
	test: $(CLASS_NAME).class sample.txt
	java -Xmx4G $(CLASS_NAME) sample.txt

	.PHONY: run
	run: $(CLASS_NAME).class
	java -Xmx4G $(CLASS_NAME) itcont.txt

	.PHONY: download-large-file
	download-large-file: itcont.txt

	indiv18.zip:
	wget https://www.fec.gov/files/bulk-downloads/2018/indiv18.zip

	itcont.txt: indiv18.zip
	unzip indiv18.zip itcont.txt

	sample.txt: itcont.txt
	head -n 44000 $< > $@
	import java.io.BufferedReader;
	import java.io.IOException;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.time.Duration;
	import java.time.Instant;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.List;
	import java.util.Map;
	import java.util.Map.Entry;
	import java.util.regex.Pattern;

	import static java.util.stream.Collectors.counting;
	import static java.util.stream.Collectors.groupingBy;

	public class ReadFileJavaApplicationBufferedReader7 {
	// returns the time between startTime and now in milliseconds
	static long between(Instant startTime) {
	return Duration.between(startTime, Instant.now()).toMillis();
	}

	public static void main(String[] args) throws IOException {
	Instant startTime = Instant.now();

	try (BufferedReader b = Files.newBufferedReader(Path.of(args[0]))) {
	int[] indexes = {0, 432, 43243};

	List<String> names = new ArrayList<>();
	List<String> dates = new ArrayList<>();
	List<String> firstNames = new ArrayList<>();

	var namePat = Pattern.compile(", \\s*([^, ]+)");
	StringBuilder sb = new StringBuilder(7);

	System.out.println("Reading file using " + Caller.getName());

	/*
	* Line format:
	*
	* 0 \| 1 \| 2 \| 3 \| DATE \| 5 \| 6 \| NAME \| 8 \| ...
	* ^ ^ ^ ^ ^ ^ ^ ^
	* 1 2 3 4 5 6 7 8
	*/
	String readLine;
	while ((readLine = b.readLine()) != null) {

	// There are at least 3 separators before the first separator
	// we're interested in so we don't need to check the first 3
	// characters.
	int startFieldIdx = 1 + nthIndexOf(readLine, '\|', 4, 3);
	int endFieldIdx = readLine.indexOf('\|', startFieldIdx);

	// extract dates
	String rawDate = readLine.substring(startFieldIdx, endFieldIdx).strip();
	sb.setLength(0);
	sb.append(rawDate, 0, 4)
	.append('-')
	.append(rawDate, 4, 6);
	dates.add(sb.toString());

	// get all the names
	startFieldIdx = 1 + nthIndexOf(readLine, '\|', 2, endFieldIdx);
	endFieldIdx = readLine.indexOf('\|', startFieldIdx);

	String name = readLine.substring(startFieldIdx, endFieldIdx).strip();
	names.add(name);

	// extract first names
	var matcher = namePat.matcher(name);
	if (matcher.find()) {
	firstNames.add(matcher.group(1));
	}
	}

	for (int i : indexes) {
	System.out.println("Name: " + names.get(i) + " at index: " + i);
	}

	System.out.println("Name time: " + between(startTime) + "ms");

	System.out.println("Total file line count: " + names.size());
	System.out.println("Line count time: " + between(startTime) + "ms");

	Map<String, Long> dateMap = dates.stream()
	.collect(groupingBy(date -> date, counting()));

	dateMap.forEach((date, count)
	-> System.out.println("Donations per month and year: " + date + " and donation count: " + count));

	System.out.println("Donations time: " + between(startTime) + "ms");

	Map<String, Long> nameMap = firstNames.stream()
	.collect(groupingBy(name -> name, counting()));

	Entry<String, Long> common = Collections.max(nameMap.entrySet(), Entry.comparingByValue());

	System.out.println("The most common first name is: " + common.getKey() + " and it occurs: " + common.getValue() + " times.");
	System.out.println("Most common name time: " + between(startTime) + "ms");
	}
	}

	/**
	* Finds the nth occurrence of {@code c} in {@code s} <em>after</em> index
	* {@code start}. To find a character at index 0, call with {@code start}
	* equal to -1.
	*/
	static int nthIndexOf(String s, char c, int n, int start) {
	int x = start;
	while (n-- > 0) {
	x = s.indexOf(c, x + 1);
	}
	return x;
	}
	}

	class Caller {
	// gets the simple name of the caller's class
	public static String getName() {
	return StackWalker.getInstance()
	.walk(s -> s.skip(1)
	.findFirst()
	.map(StackWalker.StackFrame::getClassName)
	.map(name -> name.replaceFirst("^.*\\.", ""))
	.orElse(""));
	}
	}