Last active
December 3, 2018 19:42
-
-
Save kdgregory/42f810438301592e2b1b004e3bf11ed6 to your computer and use it in GitHub Desktop.
Examples for Logging presentation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# logging pipeline | |
log4j.appender.kinesis=com.kdgregory.log4j.aws.KinesisAppender | |
log4j.appender.kinesis.streamName=LoggingExample | |
log4j.appender.kinesis.batchDelay=200 | |
log4j.appender.kinesis.layout=com.kdgregory.log4j.aws.JsonLayout | |
log4j.appender.kinesis.layout.tags=applicationName=Example,runDate={date},env=spark-dev | |
log4j.appender.kinesis.layout.enableHostname=true | |
log4j.appender.kinesis.layout.enableLocation=true | |
log4j.logger.com.kdgregory.sandbox=DEBUG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) Keith D Gregory, all rights reserved | |
package com.kdgregory.sandbox.spark.concordance; | |
import java.io.FileOutputStream; | |
import java.io.OutputStreamWriter; | |
import java.io.PrintWriter; | |
import java.util.List; | |
import org.apache.spark.SparkContext; | |
import org.apache.spark.api.java.JavaRDD; | |
import org.slf4j.Logger; | |
import org.slf4j.LoggerFactory; | |
import org.slf4j.MDC; | |
import scala.Tuple2; | |
/** | |
* Creates a simple concodance: a list of words with the files that they came from. | |
* <p> | |
* To invoke, pass two arguments: a directory containing the files to be parsed, and | |
* an output file. Note that the input files must be available on all nodes (eg, an | |
* HDFS or S3 filesystem), and that the output file will be written to the master. | |
*/ | |
public class Main | |
{ | |
private static Logger logger = LoggerFactory.getLogger(Main.class); | |
public static void main(String[] argv) | |
throws Exception | |
{ | |
SparkContext sc = new SparkContext(); | |
MDC.put("appName", sc.appName()); | |
MDC.put("applicationId", sc.applicationId()); | |
logger.info("starting job: {}", argv[0]); | |
JavaRDD<Tuple2<String,String>> rdd = sc.wholeTextFiles(argv[0], 8).toJavaRDD(); | |
List<Tuple2<String,Iterable<String>>> results = | |
rdd.flatMap(new Parser(sc.appName(), sc.applicationId())) | |
.distinct() | |
.mapToPair(t -> t.swap()) | |
.groupByKey() | |
.sortByKey() | |
.collect(); | |
logger.debug("total number of distinct words: {}", results.size()); | |
logger.debug("writing output file: {}", argv[1]); | |
try (FileOutputStream fos = new FileOutputStream(argv[1])) | |
{ | |
PrintWriter out = new PrintWriter(new OutputStreamWriter(fos, "UTF-8")); | |
for (Tuple2<String,Iterable<String>> entry : results) | |
{ | |
out.print(entry._1()); | |
out.print(": "); | |
for (String value : entry._2()) | |
{ | |
out.print(value); | |
out.print(" "); | |
} | |
out.println(); | |
} | |
} | |
logger.info("finished job"); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) Keith D Gregory, all rights reserved | |
package com.kdgregory.sandbox.spark.concordance; | |
import java.util.ArrayList; | |
import java.util.Iterator; | |
import java.util.List; | |
import org.apache.spark.api.java.function.FlatMapFunction; | |
import org.slf4j.Logger; | |
import org.slf4j.LoggerFactory; | |
import org.slf4j.MDC; | |
import scala.Tuple2; | |
/** | |
* This function accepts a tuple containg filename and file text and extracts | |
* the words from that text. A word consists of alphanumeric characters and | |
* dashes, and is returned in lowercase.. | |
*/ | |
public class Parser | |
implements FlatMapFunction<Tuple2<String,String>,Tuple2<String,String>> | |
{ | |
private static final long serialVersionUID = 1L; | |
private static final Logger logger = LoggerFactory.getLogger(Parser.class); | |
private String appName; | |
private String applicationId; | |
public Parser(String appName, String applicationId) | |
{ | |
this.appName = appName; | |
this.applicationId = applicationId; | |
} | |
@Override | |
public Iterator<Tuple2<String,String>> call(Tuple2<String,String> arg) | |
throws Exception | |
{ | |
MDC.put("appName", appName); | |
MDC.put("applicationId", applicationId); | |
String filename = arg._1().replaceAll(".*\\/", "").replaceAll(".txt$", ""); | |
MDC.put("filename", filename); | |
logger.info("source file: {}", arg._1()); | |
String text = arg._2(); | |
logger.debug("{} characters", text.length()); | |
String[] words = text.replaceAll("--", " ").split("\\s+"); | |
logger.debug("{} words", words.length); | |
List<Tuple2<String,String>> output = new ArrayList<>(words.length); | |
for (String word : words) | |
{ | |
String cleanWord = word.toLowerCase().replaceAll("[^a-z0-9-]", ""); | |
output.add(new Tuple2<>(filename, cleanWord)); | |
} | |
logger.debug("file complete"); | |
return output.iterator(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2018-05-16 12:04:03 INFO CoarseGrainedExecutorBackend:2608 - Started daemon with process name: 23900@ip-172-30-1-93 | |
2018-05-16 12:04:03 INFO SignalUtils:54 - Registered signal handler for TERM | |
2018-05-16 12:04:03 INFO SignalUtils:54 - Registered signal handler for HUP | |
2018-05-16 12:04:03 INFO SignalUtils:54 - Registered signal handler for INT | |
2018-05-16 12:04:04 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
2018-05-16 12:04:04 INFO SecurityManager:54 - Changing view acls to: ec2-user | |
2018-05-16 12:04:04 INFO SecurityManager:54 - Changing modify acls to: ec2-user | |
2018-05-16 12:04:04 INFO SecurityManager:54 - Changing view acls groups to: | |
2018-05-16 12:04:04 INFO SecurityManager:54 - Changing modify acls groups to: | |
2018-05-16 12:04:04 INFO SecurityManager:54 - SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ec2-user); groups with view permissions: Set(); users with modify permissions: Set(ec2-user); groups with modify permissions: Set() | |
2018-05-16 12:04:05 INFO TransportClientFactory:267 - Successfully created connection to master/172.30.1.17:43871 after 110 ms (0 ms spent in bootstraps) | |
2018-05-16 12:04:05 INFO SecurityManager:54 - Changing view acls to: ec2-user | |
2018-05-16 12:04:05 INFO SecurityManager:54 - Changing modify acls to: ec2-user | |
2018-05-16 12:04:05 INFO SecurityManager:54 - Changing view acls groups to: | |
2018-05-16 12:04:05 INFO SecurityManager:54 - Changing modify acls groups to: | |
2018-05-16 12:04:05 INFO SecurityManager:54 - SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ec2-user); groups with view permissions: Set(); users with modify permissions: Set(ec2-user); groups with modify permissions: Set() | |
2018-05-16 12:04:05 INFO TransportClientFactory:267 - Successfully created connection to master/172.30.1.17:43871 after 2 ms (0 ms spent in bootstraps) | |
2018-05-16 12:04:05 INFO DiskBlockManager:54 - Created local directory at /tmp/spark-1ebaea1b-9509-4c93-bfdf-6600f351d290/executor-70e1e187-2494-4996-b0b0-e441ba4f71d4/blockmgr-f93725aa-1880-42f6-99d8-cd5d68efc551 | |
2018-05-16 12:04:05 INFO MemoryStore:54 - MemoryStore started with capacity 413.9 MB | |
2018-05-16 12:04:05 INFO CoarseGrainedExecutorBackend:54 - Connecting to driver: spark://CoarseGrainedScheduler@master:43871 | |
2018-05-16 12:04:05 INFO WorkerWatcher:54 - Connecting to worker spark://[email protected]:37499 | |
2018-05-16 12:04:05 INFO TransportClientFactory:267 - Successfully created connection to /172.30.1.93:37499 after 26 ms (0 ms spent in bootstraps) | |
2018-05-16 12:04:05 INFO WorkerWatcher:54 - Successfully connected to spark://[email protected]:37499 | |
2018-05-16 12:04:05 INFO CoarseGrainedExecutorBackend:54 - Successfully registered with driver | |
2018-05-16 12:04:05 INFO Executor:54 - Starting executor ID 0 on host 172.30.1.93 | |
2018-05-16 12:04:05 INFO Utils:54 - Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 45229. | |
2018-05-16 12:04:05 INFO NettyBlockTransferService:54 - Server created on 172.30.1.93:45229 | |
2018-05-16 12:04:05 INFO BlockManager:54 - Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy | |
2018-05-16 12:04:05 INFO BlockManagerMaster:54 - Registering BlockManager BlockManagerId(0, 172.30.1.93, 45229, None) | |
2018-05-16 12:04:05 INFO BlockManagerMaster:54 - Registered BlockManager BlockManagerId(0, 172.30.1.93, 45229, None) | |
2018-05-16 12:04:05 INFO BlockManager:54 - Initialized BlockManager: BlockManagerId(0, 172.30.1.93, 45229, None) | |
2018-05-16 12:04:06 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 0 | |
2018-05-16 12:04:06 INFO Executor:54 - Running task 0.0 in stage 0.0 (TID 0) | |
2018-05-16 12:04:06 INFO Executor:54 - Fetching spark://master:43871/jars/sandbox-spark-2.3.0-SNAPSHOT.jar with timestamp 1526472242478 | |
2018-05-16 12:04:06 INFO TransportClientFactory:267 - Successfully created connection to master/172.30.1.17:43871 after 9 ms (0 ms spent in bootstraps) | |
2018-05-16 12:04:06 INFO Utils:54 - Fetching spark://master:43871/jars/sandbox-spark-2.3.0-SNAPSHOT.jar to /tmp/spark-1ebaea1b-9509-4c93-bfdf-6600f351d290/executor-70e1e187-2494-4996-b0b0-e441ba4f71d4/spark-2d118c23-7f61-45cd-be48-760a67bb620e/fetchFileTemp8957172668352263863.tmp | |
2018-05-16 12:04:06 INFO Utils:54 - Copying /tmp/spark-1ebaea1b-9509-4c93-bfdf-6600f351d290/executor-70e1e187-2494-4996-b0b0-e441ba4f71d4/spark-2d118c23-7f61-45cd-be48-760a67bb620e/13083391631526472242478_cache to /home/ec2-user/spark-2.3.0-bin-hadoop2.7/work/app-20180516120402-0004/0/./sandbox-spark-2.3.0-SNAPSHOT.jar | |
2018-05-16 12:04:06 INFO Executor:54 - Adding file:/home/ec2-user/spark-2.3.0-bin-hadoop2.7/work/app-20180516120402-0004/0/./sandbox-spark-2.3.0-SNAPSHOT.jar to class loader | |
2018-05-16 12:04:06 INFO TorrentBroadcast:54 - Started reading broadcast variable 1 | |
2018-05-16 12:04:06 INFO TransportClientFactory:267 - Successfully created connection to master/172.30.1.17:42887 after 2 ms (0 ms spent in bootstraps) | |
2018-05-16 12:04:06 INFO MemoryStore:54 - Block broadcast_1_piece0 stored as bytes in memory (estimated size 2.6 KB, free 413.9 MB) | |
2018-05-16 12:04:06 INFO TorrentBroadcast:54 - Reading broadcast variable 1 took 248 ms | |
2018-05-16 12:04:06 INFO MemoryStore:54 - Block broadcast_1 stored as values in memory (estimated size 4.5 KB, free 413.9 MB) | |
2018-05-16 12:04:06 INFO WholeTextFileRDD:54 - Input split: Paths:/tmp/FolgerShakespeare/Henry_IV_Part_2.txt:0+159845,/tmp/FolgerShakespeare/The_Comedy_of_Errors.txt:0+93232,/tmp/FolgerShakespeare/Twelfth_Night.txt:0+119203,/tmp/FolgerShakespeare/Cymbeline.txt:0+169065,/tmp/FolgerShakespeare/Measure_for_Measure.txt:0+132746,/tmp/FolgerShakespeare/Romeo_and_Juliet.txt:0+146692 | |
2018-05-16 12:04:06 INFO TorrentBroadcast:54 - Started reading broadcast variable 0 | |
2018-05-16 12:04:06 INFO MemoryStore:54 - Block broadcast_0_piece0 stored as bytes in memory (estimated size 23.0 KB, free 413.9 MB) | |
2018-05-16 12:04:06 INFO TorrentBroadcast:54 - Reading broadcast variable 0 took 18 ms | |
2018-05-16 12:04:07 INFO MemoryStore:54 - Block broadcast_0 stored as values in memory (estimated size 321.3 KB, free 413.6 MB) | |
2018-05-16 12:04:07 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Henry_IV_Part_2.txt | |
2018-05-16 12:04:08 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/The_Comedy_of_Errors.txt | |
2018-05-16 12:04:08 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Twelfth_Night.txt | |
2018-05-16 12:04:08 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Cymbeline.txt | |
2018-05-16 12:04:09 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Measure_for_Measure.txt | |
2018-05-16 12:04:09 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Romeo_and_Juliet.txt | |
2018-05-16 12:04:09 INFO Executor:54 - Finished task 0.0 in stage 0.0 (TID 0). 1196 bytes result sent to driver | |
2018-05-16 12:04:09 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 3 | |
2018-05-16 12:04:09 INFO Executor:54 - Running task 3.0 in stage 0.0 (TID 3) | |
2018-05-16 12:04:09 INFO WholeTextFileRDD:54 - Input split: Paths:/tmp/FolgerShakespeare/Antony_and_Cleopatra.txt:0+156324,/tmp/FolgerShakespeare/Timon_of_Athens.txt:0+115153,/tmp/FolgerShakespeare/Much_Ado_About_Nothing.txt:0+127764,/tmp/FolgerShakespeare/The_Two_Noble_Kinsmen.txt:0+147285,/tmp/FolgerShakespeare/As_You_Like_It.txt:0+131850,/tmp/FolgerShakespeare/Hamlet.txt:0+182863 | |
2018-05-16 12:04:09 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Antony_and_Cleopatra.txt | |
2018-05-16 12:04:10 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Timon_of_Athens.txt | |
2018-05-16 12:04:10 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Much_Ado_About_Nothing.txt | |
2018-05-16 12:04:10 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/The_Two_Noble_Kinsmen.txt | |
2018-05-16 12:04:10 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/As_You_Like_It.txt | |
2018-05-16 12:04:10 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Hamlet.txt | |
2018-05-16 12:04:10 INFO Executor:54 - Finished task 3.0 in stage 0.0 (TID 3). 1153 bytes result sent to driver | |
2018-05-16 12:04:10 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 5 | |
2018-05-16 12:04:10 INFO Executor:54 - Running task 5.0 in stage 0.0 (TID 5) | |
2018-05-16 12:04:10 INFO WholeTextFileRDD:54 - Input split: Paths:/tmp/FolgerShakespeare/Lucrece.txt:0+87916,/tmp/FolgerShakespeare/Richard_III.txt:0+181578,/tmp/FolgerShakespeare/The_Taming_of_the_Shrew.txt:0+130309,/tmp/FolgerShakespeare/Othello.txt:0+159401,/tmp/FolgerShakespeare/Sonnets.txt:0+99254,/tmp/FolgerShakespeare/Henry_V.txt:0+157455 | |
2018-05-16 12:04:10 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Lucrece.txt | |
2018-05-16 12:04:11 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Richard_III.txt | |
2018-05-16 12:04:11 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/The_Taming_of_the_Shrew.txt | |
2018-05-16 12:04:11 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Othello.txt | |
2018-05-16 12:04:11 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Sonnets.txt | |
2018-05-16 12:04:11 INFO Parser:48 - source file: file:/tmp/FolgerShakespeare/Henry_V.txt | |
2018-05-16 12:04:11 INFO Executor:54 - Finished task 5.0 in stage 0.0 (TID 5). 1153 bytes result sent to driver | |
2018-05-16 12:04:12 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 8 | |
2018-05-16 12:04:12 INFO Executor:54 - Running task 1.0 in stage 1.0 (TID 8) | |
2018-05-16 12:04:12 INFO MapOutputTrackerWorker:54 - Updating epoch to 1 and clearing cache | |
2018-05-16 12:04:12 INFO TorrentBroadcast:54 - Started reading broadcast variable 2 | |
2018-05-16 12:04:12 INFO MemoryStore:54 - Block broadcast_2_piece0 stored as bytes in memory (estimated size 2.7 KB, free 413.6 MB) | |
2018-05-16 12:04:12 INFO TorrentBroadcast:54 - Reading broadcast variable 2 took 10 ms | |
2018-05-16 12:04:12 INFO MemoryStore:54 - Block broadcast_2 stored as values in memory (estimated size 5.0 KB, free 413.6 MB) | |
2018-05-16 12:04:12 INFO MapOutputTrackerWorker:54 - Don't have map outputs for shuffle 1, fetching them | |
2018-05-16 12:04:12 INFO MapOutputTrackerWorker:54 - Doing the fetch; tracker endpoint = NettyRpcEndpointRef(spark://MapOutputTracker@master:43871) | |
2018-05-16 12:04:12 INFO MapOutputTrackerWorker:54 - Got the output locations | |
2018-05-16 12:04:12 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:12 INFO TransportClientFactory:267 - Successfully created connection to /172.30.1.61:40663 after 2 ms (0 ms spent in bootstraps) | |
2018-05-16 12:04:12 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 19 ms | |
2018-05-16 12:04:12 INFO Executor:54 - Finished task 1.0 in stage 1.0 (TID 8). 1368 bytes result sent to driver | |
2018-05-16 12:04:12 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 9 | |
2018-05-16 12:04:12 INFO Executor:54 - Running task 2.0 in stage 1.0 (TID 9) | |
2018-05-16 12:04:12 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:12 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 10 ms | |
2018-05-16 12:04:12 INFO Executor:54 - Finished task 2.0 in stage 1.0 (TID 9). 1368 bytes result sent to driver | |
2018-05-16 12:04:12 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 11 | |
2018-05-16 12:04:12 INFO Executor:54 - Running task 4.0 in stage 1.0 (TID 11) | |
2018-05-16 12:04:12 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:12 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 2 ms | |
2018-05-16 12:04:13 INFO Executor:54 - Finished task 4.0 in stage 1.0 (TID 11). 1325 bytes result sent to driver | |
2018-05-16 12:04:13 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 15 | |
2018-05-16 12:04:13 INFO Executor:54 - Running task 1.0 in stage 2.0 (TID 15) | |
2018-05-16 12:04:13 INFO MapOutputTrackerWorker:54 - Updating epoch to 2 and clearing cache | |
2018-05-16 12:04:13 INFO TorrentBroadcast:54 - Started reading broadcast variable 3 | |
2018-05-16 12:04:13 INFO MemoryStore:54 - Block broadcast_3_piece0 stored as bytes in memory (estimated size 3.2 KB, free 413.6 MB) | |
2018-05-16 12:04:13 INFO TorrentBroadcast:54 - Reading broadcast variable 3 took 13 ms | |
2018-05-16 12:04:13 INFO MemoryStore:54 - Block broadcast_3 stored as values in memory (estimated size 6.3 KB, free 413.6 MB) | |
2018-05-16 12:04:13 INFO MapOutputTrackerWorker:54 - Don't have map outputs for shuffle 0, fetching them | |
2018-05-16 12:04:13 INFO MapOutputTrackerWorker:54 - Doing the fetch; tracker endpoint = NettyRpcEndpointRef(spark://MapOutputTracker@master:43871) | |
2018-05-16 12:04:13 INFO MapOutputTrackerWorker:54 - Got the output locations | |
2018-05-16 12:04:13 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:13 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 5 ms | |
2018-05-16 12:04:13 INFO Executor:54 - Finished task 1.0 in stage 2.0 (TID 15). 2115 bytes result sent to driver | |
2018-05-16 12:04:13 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 17 | |
2018-05-16 12:04:13 INFO Executor:54 - Running task 3.0 in stage 2.0 (TID 17) | |
2018-05-16 12:04:13 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:13 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 6 ms | |
2018-05-16 12:04:13 INFO Executor:54 - Finished task 3.0 in stage 2.0 (TID 17). 2062 bytes result sent to driver | |
2018-05-16 12:04:13 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 19 | |
2018-05-16 12:04:13 INFO Executor:54 - Running task 5.0 in stage 2.0 (TID 19) | |
2018-05-16 12:04:13 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:13 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 6 ms | |
2018-05-16 12:04:13 INFO Executor:54 - Finished task 5.0 in stage 2.0 (TID 19). 2088 bytes result sent to driver | |
2018-05-16 12:04:13 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 21 | |
2018-05-16 12:04:13 INFO Executor:54 - Running task 0.0 in stage 5.0 (TID 21) | |
2018-05-16 12:04:13 INFO TorrentBroadcast:54 - Started reading broadcast variable 4 | |
2018-05-16 12:04:13 INFO MemoryStore:54 - Block broadcast_4_piece0 stored as bytes in memory (estimated size 3.4 KB, free 413.6 MB) | |
2018-05-16 12:04:13 INFO TorrentBroadcast:54 - Reading broadcast variable 4 took 42 ms | |
2018-05-16 12:04:13 INFO MemoryStore:54 - Block broadcast_4 stored as values in memory (estimated size 6.4 KB, free 413.6 MB) | |
2018-05-16 12:04:13 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:13 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 8 ms | |
2018-05-16 12:04:14 INFO Executor:54 - Finished task 0.0 in stage 5.0 (TID 21). 1325 bytes result sent to driver | |
2018-05-16 12:04:14 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 23 | |
2018-05-16 12:04:14 INFO Executor:54 - Running task 2.0 in stage 5.0 (TID 23) | |
2018-05-16 12:04:14 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:14 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 9 ms | |
2018-05-16 12:04:14 INFO Executor:54 - Finished task 2.0 in stage 5.0 (TID 23). 1368 bytes result sent to driver | |
2018-05-16 12:04:14 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 25 | |
2018-05-16 12:04:14 INFO Executor:54 - Running task 4.0 in stage 5.0 (TID 25) | |
2018-05-16 12:04:14 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:14 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 2 ms | |
2018-05-16 12:04:14 INFO Executor:54 - Finished task 4.0 in stage 5.0 (TID 25). 1325 bytes result sent to driver | |
2018-05-16 12:04:14 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 27 | |
2018-05-16 12:04:14 INFO Executor:54 - Running task 6.0 in stage 5.0 (TID 27) | |
2018-05-16 12:04:14 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:14 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 3 ms | |
2018-05-16 12:04:14 INFO Executor:54 - Finished task 6.0 in stage 5.0 (TID 27). 1325 bytes result sent to driver | |
2018-05-16 12:04:14 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 28 | |
2018-05-16 12:04:14 INFO Executor:54 - Running task 0.0 in stage 6.0 (TID 28) | |
2018-05-16 12:04:14 INFO MapOutputTrackerWorker:54 - Updating epoch to 3 and clearing cache | |
2018-05-16 12:04:14 INFO TorrentBroadcast:54 - Started reading broadcast variable 5 | |
2018-05-16 12:04:14 INFO MemoryStore:54 - Block broadcast_5_piece0 stored as bytes in memory (estimated size 2.1 KB, free 413.6 MB) | |
2018-05-16 12:04:14 INFO TorrentBroadcast:54 - Reading broadcast variable 5 took 10 ms | |
2018-05-16 12:04:14 INFO MemoryStore:54 - Block broadcast_5 stored as values in memory (estimated size 3.5 KB, free 413.6 MB) | |
2018-05-16 12:04:14 INFO MapOutputTrackerWorker:54 - Don't have map outputs for shuffle 2, fetching them | |
2018-05-16 12:04:14 INFO MapOutputTrackerWorker:54 - Doing the fetch; tracker endpoint = NettyRpcEndpointRef(spark://MapOutputTracker@master:43871) | |
2018-05-16 12:04:14 INFO MapOutputTrackerWorker:54 - Got the output locations | |
2018-05-16 12:04:14 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:14 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 1 ms | |
2018-05-16 12:04:14 INFO Executor:54 - Finished task 0.0 in stage 6.0 (TID 28). 418968 bytes result sent to driver | |
2018-05-16 12:04:14 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 30 | |
2018-05-16 12:04:14 INFO Executor:54 - Running task 2.0 in stage 6.0 (TID 30) | |
2018-05-16 12:04:14 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:14 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 1 ms | |
2018-05-16 12:04:15 INFO Executor:54 - Finished task 2.0 in stage 6.0 (TID 30). 564993 bytes result sent to driver | |
2018-05-16 12:04:15 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 32 | |
2018-05-16 12:04:15 INFO Executor:54 - Running task 4.0 in stage 6.0 (TID 32) | |
2018-05-16 12:04:15 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:15 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 3 ms | |
2018-05-16 12:04:15 INFO Executor:54 - Finished task 4.0 in stage 6.0 (TID 32). 549159 bytes result sent to driver | |
2018-05-16 12:04:15 INFO CoarseGrainedExecutorBackend:54 - Got assigned task 34 | |
2018-05-16 12:04:15 INFO Executor:54 - Running task 6.0 in stage 6.0 (TID 34) | |
2018-05-16 12:04:15 INFO ShuffleBlockFetcherIterator:54 - Getting 7 non-empty blocks out of 7 blocks | |
2018-05-16 12:04:15 INFO ShuffleBlockFetcherIterator:54 - Started 1 remote fetches in 2 ms | |
2018-05-16 12:04:15 INFO Executor:54 - Finished task 6.0 in stage 6.0 (TID 34). 706956 bytes result sent to driver | |
2018-05-16 12:04:16 INFO CoarseGrainedExecutorBackend:54 - Driver commanded a shutdown | |
2018-05-16 12:04:16 ERROR CoarseGrainedExecutorBackend:43 - RECEIVED SIGNAL TERM | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Setup machine:
/etc/hosts
entries for master, workerssudo yum remove java-1.7.0-openjdk.x86_64
sudo yum install java-1.8.0-openjdk-devel.x86_64
Download dependencies:
aws s3 cp --recursive s3://com-kdgregory-transfer/FolgerShakespeare/ /tmp
aws s3 cp s3://com-kdgregory-transfer/spark-2.3.0-bin-hadoop2.7.tgz /tmp
aws s3 cp s3://com-kdgregory-transfer/logging-dependencies-spark.tgz /tmp
Install Spark:
cd
tar zxvf /tmp/spark-2.3.0-bin-hadoop2.7.tgz
cd spark-2.3.0-bin-hadoop2.7/
Update logging configuration:
pushd jars/
tar zxvf /tmp/logging-dependencies-spark.tgz
popd
cp conf/log4j.properties.template conf/log4j.properties
vi conf/log4j.properties
log4j.properties.update
, updaterootLogger
Run Master:
sbin/start-master.sh
tail -f logs/spark-ec2-user-org.apache.spark.deploy.master.*.out
Run Workers:
sbin/start-slave.sh spark://master:7077
tail -f logs/spark-ec2-user-org.apache.spark.deploy.worker.*.out