Last active
August 29, 2015 14:24
-
-
Save rmuhamedgaliev/37ca6fabf529562acd7f to your computer and use it in GitHub Desktop.
Parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package io.github.rmuhamedgaliev; | |
import java.io.BufferedWriter; | |
import java.io.File; | |
import java.io.FileWriter; | |
import java.io.IOException; | |
import java.nio.file.Files; | |
import java.nio.file.Paths; | |
import java.util.*; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import java.util.stream.Collectors; | |
/** | |
* @author <a href="mailto:[email protected]">Rinat Muhamedgaliev</a> | |
*/ | |
public class Converter { | |
private static List<File> files = new ArrayList<>(); | |
public static void main(String[] args) throws IOException { | |
listFilesForFolder(Paths.get("txt").toFile()); | |
files.stream().forEach(f -> { | |
Map<String, Object> result = readTextFromFile(f); | |
System.out.println("finish"); | |
}); | |
} | |
private static void listFilesForFolder(final File folder) { | |
for (final File fileEntry : folder.listFiles()) { | |
if (fileEntry.isDirectory()) { | |
listFilesForFolder(fileEntry); | |
} else { | |
files.add(fileEntry); | |
} | |
} | |
} | |
private static Map<String, Object> readTextFromFile(File file) { | |
StringBuffer stringBuffer = new StringBuffer(); | |
stringBuffer.append("\n" + file.getName() + "\n"); | |
Map<String, String> paragraphs = null; | |
try { | |
paragraphs = parseLines(Files.readAllLines(file.toPath())); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
return filterNumbers(paragraphs); | |
} | |
private static String extractNumberFromText(String text) { | |
return text.split(" ")[0]; | |
} | |
private static Map<String, Object> filterNumbers(Map<String, String> numbers) { | |
Map<String, Object> result = new TreeMap<>(); | |
Set<String> removeItems = new TreeSet<>(); | |
Map<String, String> headers = new HashMap<>(); | |
numbers.keySet().stream().forEach(number -> { | |
List<String> keys = numbers.keySet().stream().filter(n -> n.startsWith(number + ".")).collect(Collectors.toList()); | |
if (keys.size() > 0) { | |
Map<String, String> paragraphMap = new TreeMap<>(); | |
keys.stream().forEach(k -> { | |
String paragraph = numbers.get(k); | |
paragraphMap.put(k, paragraph); | |
removeItems.add(k); | |
}); | |
String headParagraph = numbers.get(number); | |
headers.put(number, headParagraph); | |
result.put(number, paragraphMap); | |
removeItems.add(number); | |
} | |
}); | |
numbers.keySet().removeAll(removeItems); | |
result.putAll(numbers); | |
Map<String, Object> newResult = new TreeMap<>(result); | |
result.keySet().stream().forEach(key -> { | |
if (result.get(key).getClass().isAssignableFrom(TreeMap.class)) { | |
Map<String, String> internalKeys = (Map<String, String>) result.get(key); | |
Set<String> p = new TreeSet<>(); | |
internalKeys.keySet().stream().forEach(inKey -> { | |
if (result.containsKey(inKey)) { | |
Map<String, String> teKeys = (Map<String, String>) result.get(inKey); | |
p.addAll(teKeys.keySet()); | |
} | |
}); | |
internalKeys.keySet().removeAll(p); | |
newResult.put(key, internalKeys); | |
} | |
}); | |
newResult.keySet().stream().forEach(elem -> { | |
if (newResult.get(elem).getClass().isAssignableFrom(TreeMap.class)) { | |
Map<String, String> internalKeys = (Map<String, String>) result.get(elem); | |
String paragraphElem = headers.get(elem); | |
internalKeys.put(elem, paragraphElem); | |
newResult.put(elem, internalKeys); | |
} | |
}); | |
return newResult; | |
} | |
private static Map<String, String> parseLines(List<String> lines) { | |
Map<String, String> paragraphs = new HashMap<>(); | |
for (int i = 0; i < lines.size(); i++) { | |
String line = lines.get(i); | |
if (line != null && !line.isEmpty()) { | |
if (!(line.indexOf("..") > 0)) { | |
Matcher m = Pattern.compile("^(\\d{1,5}\\.(\\d{1,5})?){1,10}( +)([а-яА-Яa-zA-Z0-9_,:\\(\\)\\[\\].\\- ]*)$").matcher(line); | |
if (m.find()) { | |
String result = m.group(); | |
paragraphs.put( | |
extractNumberFromText(result), | |
result.substring(result.indexOf(" ")) | |
); | |
} | |
} | |
} | |
} | |
return paragraphs; | |
} | |
private static void writeTOFile(String filname, String content) { | |
try ( | |
BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File("out/" + filname))); | |
) { | |
bufferedWriter.write(content); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment