Created
February 13, 2020 18:24
-
-
Save dag05ru/43576c3617cbeee4fd2d48d14f2006b7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.concurrent.ExecutorService; | |
import java.util.concurrent.Executors; | |
import java.util.concurrent.Future; | |
import java.util.stream.Collectors; | |
public class FastSplitter { | |
private ExecutorService executor; | |
public void solution(String input) { | |
// находим все переходы на новую строку что бы в даньнейшем | |
// разбить на примерно равные подмножества | |
List<Integer> delimiterPos = new ArrayList<>(); | |
for(int i = 0; i < input.length(); i++) { | |
if(input.charAt(i) == '\n') { | |
delimiterPos.add(i); | |
} | |
} | |
// если сообщение короткое то считам в один поток | |
if(delimiterPos.size() < 5) { | |
print(calculate(input, 0, input.length())); | |
return; | |
} | |
System.out.println("mult"); | |
// вычисляем количество ядер (потоков) | |
// и решаем подзадачи | |
int cpuCount = Runtime.getRuntime().availableProcessors(); | |
executor = Executors.newFixedThreadPool(cpuCount); | |
int len = delimiterPos.size() / cpuCount; | |
List<Future<Map<Integer, List<String>>>> futures = new ArrayList<>(); | |
int start = 0; | |
int end = 0; | |
for(int i = 1; i < cpuCount; i++) { | |
start = end; | |
end = delimiterPos.get(i*len); | |
futures.add(submit(input, start, end)); | |
} | |
futures.add(submit(input, end, input.length())); | |
// ждем пока завершатся все потоки | |
while(true) { | |
boolean flag = true; | |
for(Future<Map<Integer, List<String>>> f: futures) { | |
if(!f.isDone()) { | |
flag = false; | |
break; | |
} | |
} | |
if(flag) break; | |
} | |
// складываем результаты | |
try { | |
Map<Integer, List<String>> mRes = futures.get(0).get(); | |
for (int i = 1; i < futures.size(); i++) { | |
merge(mRes, futures.get(i).get()); | |
} | |
print(mRes); | |
} catch (Exception e) { | |
System.err.println(e); | |
} | |
} | |
public void merge(Map<Integer, List<String>> m1, Map<Integer, List<String>> m2) { | |
m2.keySet().stream().forEach(key -> { | |
if (m1.containsKey(key)) { | |
m1.get(key).addAll(m2.get(key)); | |
} else { | |
m1.put(key, m2.get(key)); | |
} | |
}); | |
} | |
public void print(Map<Integer, List<String>> res) { | |
for(Integer key: res.keySet()) { | |
for(String s: res.get(key)) { | |
System.out.println(s); | |
} | |
System.out.println("------------------"); | |
} | |
} | |
public Future<Map<Integer, List<String>>> submit(String input, int start, int end) { | |
return executor.submit(() -> { | |
return calculate(input, start, end); | |
}); | |
} | |
public Map<Integer, List<String>> calculate(String input, int start, int end) { | |
Map<Integer, List<String>> result = new HashMap<>(); | |
List<List<String>> sac = splitAndClear(input, start, end); | |
sac.stream().forEach( line -> { | |
int key = calcHashCode(line); | |
String clearLine = line.stream().collect(Collectors.joining(", ")); | |
if (result.containsKey(key)) { | |
result.get(key).add(clearLine); | |
} else { | |
List<String> lists = new ArrayList<>(); | |
lists.add(clearLine); | |
result.put(key, lists); | |
} | |
}); | |
return result; | |
} | |
// Очищаем и делим строку на списки строк за один проход | |
public List<List<String>> splitAndClear(String input, int start, int end) { | |
List<List<String>> result = new ArrayList<>(); | |
boolean prevIsW = false; | |
boolean newLine = true; | |
StringBuilder word = new StringBuilder(); | |
List<String> line = new ArrayList<>(); | |
for(int i = start; i < end; i++){ | |
if(newLine) line = new ArrayList<>(); | |
char c = input.charAt(i); | |
if(Character.isLetterOrDigit(c)) { | |
newLine = false; | |
if (!prevIsW) { | |
word = new StringBuilder(); | |
word.append(c); | |
prevIsW = true; | |
} else { | |
word.append(c); | |
} | |
if(i == input.length() - 1) { | |
line.add(word.toString()); | |
} | |
} else { | |
newLine = false; | |
if(prevIsW) { | |
line.add(word.toString()); | |
} | |
if((c == '\n' || i == input.length() - 1) && !line.isEmpty()) { | |
result.add(line); | |
newLine = true; | |
} | |
prevIsW = false; | |
} | |
} | |
return result; | |
} | |
// вычисляем hashcode от массива | |
public int calcHashCode(List<String> strings) { | |
final int prime = 31; | |
int result = 1; | |
List<String> sortedStrs = strings.stream().sorted().collect(Collectors.toList()); | |
for( String s : sortedStrs ) | |
{ | |
result = result * prime + s.hashCode(); | |
} | |
return result; | |
} | |
// тест | |
public static void main(String[] args) { | |
String data ="раз,2, три,\n" + | |
"елочка, гори!\n" + | |
"три, 2, раз...\n" + | |
"лысый дикобраз\n" + | |
"***\n"; | |
StringBuilder sb = new StringBuilder(); | |
for(int i = 0; i < 4000; i++) { | |
sb.append(data); | |
} | |
String bigLine = sb.toString(); | |
long start = System.currentTimeMillis(); | |
new FastSplitter().solution(bigLine); | |
System.out.println("fast1: " + (System.currentTimeMillis() - start)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment