dhagan · September 29, 2015 00:31 · sara251186 · Nov 24, 2021
diff --git a/concordance.java b/concordance.java
 import java.io.*;
 import java.nio.file.*;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 public class concordance {

    /**
     * Usage is: java concordance <<filename>>"
     *
     * @param args
     */
    public static void main(String[] args) {
        if(args.length == 0)
        {
            System.out.println("Proper Usage is: java concordance <<filename>>");
            System.exit(0);
        }

        _main(args);
    }

    /**
     * wrapper
     *
     * @param args
     */
    static void _main(String[] args) {
        Map<String, WordInfo> concordanceMap = new HashMap<>();
        String myText;
        try {
            // deficiency - would need a streaming strategy if the file text sizes
            myText = new String(Files.readAllBytes(Paths.get(args[0])));
        } catch (IOException e) {
            System.out.print(args[0] + " File not found! Please check that the file exists.");
            return;
        }

        String[] sentences = splitSentences(myText);
        int sentenceNumber = 1;
        for (String sentence : sentences) {
            List<String> words = splitWords(sentence);
            for (String word : words) {
                String _word = word.toLowerCase();
                if (!concordanceMap.containsKey(_word)) {
                    concordanceMap.put(_word, new WordInfo(_word, sentenceNumber));
                } else {
                    concordanceMap.get(_word).WordCount++;
                    concordanceMap.get(_word).SentenceNumbers.add(sentenceNumber);
                }
            }
            sentenceNumber++;
        }
        Map<String, WordInfo> treeMap = new TreeMap<>(concordanceMap);
        printMap(treeMap);
    }

    /**
     * format output
     * @param map
     */
    public static void printMap(Map<String, WordInfo> map) {
        for (Map.Entry<String, WordInfo> entry : map.entrySet()) {
            WordInfo wordInfo = entry.getValue();
            StringJoiner sentenceNumbers = new StringJoiner(",");
            for (Integer number : wordInfo.SentenceNumbers) {
                sentenceNumbers.add(number.toString());
            }
            String value = "{" + wordInfo.WordCount + ":" + sentenceNumbers.toString() + "}";
            System.out.println(String.format("%-20s %s", entry.getKey(), value));
        }
    }


    /**
     * split sentences into words, handle special case like i.e.
     *
     * please note I have used stack overflow suggestions for the regex
     * @param sentence
     * @return
     */
    static List<String> splitWords(String sentence) {
        List<String> allMatches = new ArrayList<String>();
        Matcher matcher = Pattern.compile("((\\b[^\\s]+\\b)((?<=\\.\\w).)?)").matcher(sentence);
        while (matcher.find()) {
            allMatches.add(matcher.group());
        }
        return allMatches;
    }

    /**
     *
     * split text blob into sentences
     *
     * @param text
     * @return
     */
    static String[] splitSentences(String text) {
        String pattern = "(?<=[.!?])\\s+(?=[A-Z])";
        return text.split(pattern);
    }


    /**
     * hold class for word count info,
     * deficiency - next iteration abstract public members
     */
    public static class WordInfo {
        public WordInfo(String word, int sentenceNumber) {
            Word = word;
            WordCount = 1;
            SentenceNumbers.add(sentenceNumber);

        }

        public String Word;
        public int WordCount;
        public List<Integer> SentenceNumbers = new ArrayList<>();

    }
 }
	import java.io.*;
	import java.nio.file.*;
	import java.util.*;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	public class concordance {

	/**
	* Usage is: java concordance <<filename>>"
	*
	* @param args
	*/
	public static void main(String[] args) {
	if(args.length == 0)
	{
	System.out.println("Proper Usage is: java concordance <<filename>>");
	System.exit(0);
	}

	_main(args);
	}

	/**
	* wrapper
	*
	* @param args
	*/
	static void _main(String[] args) {
	Map<String, WordInfo> concordanceMap = new HashMap<>();
	String myText;
	try {
	// deficiency - would need a streaming strategy if the file text sizes
	myText = new String(Files.readAllBytes(Paths.get(args[0])));
	} catch (IOException e) {
	System.out.print(args[0] + " File not found! Please check that the file exists.");
	return;
	}

	String[] sentences = splitSentences(myText);
	int sentenceNumber = 1;
	for (String sentence : sentences) {
	List<String> words = splitWords(sentence);
	for (String word : words) {
	String _word = word.toLowerCase();
	if (!concordanceMap.containsKey(_word)) {
	concordanceMap.put(_word, new WordInfo(_word, sentenceNumber));
	} else {
	concordanceMap.get(_word).WordCount++;
	concordanceMap.get(_word).SentenceNumbers.add(sentenceNumber);
	}
	}
	sentenceNumber++;
	}
	Map<String, WordInfo> treeMap = new TreeMap<>(concordanceMap);
	printMap(treeMap);
	}

	/**
	* format output
	* @param map
	*/
	public static void printMap(Map<String, WordInfo> map) {
	for (Map.Entry<String, WordInfo> entry : map.entrySet()) {
	WordInfo wordInfo = entry.getValue();
	StringJoiner sentenceNumbers = new StringJoiner(",");
	for (Integer number : wordInfo.SentenceNumbers) {
	sentenceNumbers.add(number.toString());
	}
	String value = "{" + wordInfo.WordCount + ":" + sentenceNumbers.toString() + "}";
	System.out.println(String.format("%-20s %s", entry.getKey(), value));
	}
	}


	/**
	* split sentences into words, handle special case like i.e.
	*
	* please note I have used stack overflow suggestions for the regex
	* @param sentence
	* @return
	*/
	static List<String> splitWords(String sentence) {
	List<String> allMatches = new ArrayList<String>();
	Matcher matcher = Pattern.compile("((\\b[^\\s]+\\b)((?<=\\.\\w).)?)").matcher(sentence);
	while (matcher.find()) {
	allMatches.add(matcher.group());
	}
	return allMatches;
	}

	/**
	*
	* split text blob into sentences
	*
	* @param text
	* @return
	*/
	static String[] splitSentences(String text) {
	String pattern = "(?<=[.!?])\\s+(?=[A-Z])";
	return text.split(pattern);
	}


	/**
	* hold class for word count info,
	* deficiency - next iteration abstract public members
	*/
	public static class WordInfo {
	public WordInfo(String word, int sentenceNumber) {
	Word = word;
	WordCount = 1;
	SentenceNumbers.add(sentenceNumber);

	}

	public String Word;
	public int WordCount;
	public List<Integer> SentenceNumbers = new ArrayList<>();

	}
	}
No results found