Skip to content

Instantly share code, notes, and snippets.

View dimaspivak's full-sized avatar

Dima Spivak dimaspivak

View GitHub Profile
ARG SDC_VERSION=3.17.0
# Set up stage libs here. Naming allows you to refer to them by name instead of by integer.
FROM streamsets/datacollector-libs:streamsets-datacollector-jdbc-lib-${SDC_VERSION} as jdbc-lib
# Using build args for the directories just makes things easier to read. These can be found in the image layers
# page on Docker Hub.
ARG JDBC_LIB_DIR=/opt/streamsets-datacollector-${SDC_VERSION}/streamsets-libs/streamsets-datacollector-jdbc-lib
FROM streamsets/datacollector:${SDC_VERSION}
COPY --from=jdbc-lib ${JDBC_LIB_DIR} ${JDBC_LIB_DIR}
from streamsets.sdk import DataCollector
SDC_URL = 'localhost:18630'
SDC_USERNAME = 'admin'
SDC_PASSWORD = 'admin'
STAGE_NAME = 'com_streamsets_pipeline_stage_destination_mapreduce_MapReduceDExecutor'
LEGACY_LIBRARY = 'streamsets-datacollector-cdh_5_12-lib'
NEW_LIBRARY = 'streamsets-datacollector-cdh_5_15-lib'
from streamsets.sdk import DataCollector
SDC_URL = 'localhost:18630'
SDC_USERNAME = 'admin'
SDC_PASSWORD = 'admin'
STAGE_NAME = 'com_streamsets_pipeline_stage_destination_mapreduce_MapReduceDExecutor'
LEGACY_LIBRARY = 'streamsets-datacollector-cdh_5_12-lib'
NEW_LIBRARY = 'streamsets-datacollector-cdh_5_15-lib'
import pytest
@pytest.mark.parametrize('data_format', ['DELIMITED'])
@pytest.mark.parametrize('header_line', ['WITH_HEADER'])
@pytest.mark.parametrize('config_value', ['False', 'True'])
def test_directory_origin_config_allow_extra_columns(sdc_builder, sdc_executor, config_value, data_format, header_line):
pass
import pytest
@pytest.mark.parametrize('data_format', ['DELIMITED'])
@pytest.mark.parametrize('header_line', ['WITH_HEADER'])
@pytest.mark.parametrize('config_value', ['False', 'True'])
def test_directory_origin_config_allow_extra_columns(sdc_builder, sdc_executor, config_value, data_format, header_line):
pass
This file has been truncated, but you can view the full file.
2018-12-21 01:34:12,512 [user:] [pipeline:] [runner:] [thread:main] INFO Main - -----------------------------------------------------------------
2018-12-21 01:34:12,513 [user:] [pipeline:] [runner:] [thread:main] INFO Main - Build info:
2018-12-21 01:34:12,513 [user:] [pipeline:] [runner:] [thread:main] INFO Main - Version : 3.7.0
2018-12-21 01:34:12,514 [user:] [pipeline:] [runner:] [thread:main] INFO Main - Date : 2018-12-20T22:08Z
2018-12-21 01:34:12,514 [user:] [pipeline:] [runner:] [thread:main] INFO Main - Built by : ubuntu
2018-12-21 01:34:12,514 [user:] [pipeline:] [runner:] [thread:main] INFO Main - Repo SHA : 6da8299871663ddf58b033b092b2544da49d2b2d
2018-12-21 01:34:12,514 [user:] [pipeline:] [runner:] [thread:main] INFO Main - Source MD5 : ca4c16c35ae9d9ab4fc8e38c20983077
2018-12-21 01:34:12,514 [user:] [pipeline:] [runner:] [thread:main] INFO Main - -----------------------------------------------------------------
2018-12-21 01:34:12,514 [user:]
@dimaspivak
dimaspivak / gist:9b17b8192b2ca7557daff0ce20967c0c
Created December 19, 2018 00:42
test_kafka_destination.py::test_kafka_error_destination
2018-12-18 22:47:24,126 [user:*admin] [pipeline:To Error Kafka/ToErrorKafkaf5f4826a-dc20-400e-9fba-0fc7421e7256] [runner:] [thread:ProductionPipelineRunnable-ToErrorKafkaf5f4826a-dc20-400e-9fba-0fc7421e7256-To Error Kafka] INFO AppInfoParser - Kafka version : 0.10.0-kafka-2.1.1
2018-12-18 22:47:24,126 [user:*admin] [pipeline:To Error Kafka/ToErrorKafkaf5f4826a-dc20-400e-9fba-0fc7421e7256] [runner:] [thread:ProductionPipelineRunnable-ToErrorKafkaf5f4826a-dc20-400e-9fba-0fc7421e7256-To Error Kafka] INFO AppInfoParser - Kafka commitId : unknown
2018-12-18 22:47:31,266 [user:*admin] [pipeline:To Error Kafka/ToErrorKafkaf5f4826a-dc20-400e-9fba-0fc7421e7256] [runner:] [thread:ProductionPipelineRunnable-ToErrorKafkaf5f4826a-dc20-400e-9fba-0fc7421e7256-To Error Kafka] ERROR ClientUtils - Failed to close consumer metrics
java.lang.StackOverflowError
at java.io.ExpiringCache.get(ExpiringCache.java:78)
at java.io.UnixFileSystem.canonicalize(UnixFileSystem.java:152)
at java.io.File.getCanonicalPath(File.java:618)
a
# Install clusterdock.
pip3 install clusterdock
# Clone the Apache Kafka topology for clusterdock.
git clone https://github.com/clusterdock/topology_apache_kafka.git
# Start Apache Kafka (defaults to node-1.cluster, node-2.cluster, node-3.cluster)
clusterdock -v start topology_apache_kafka --brokers node-1 node-2 node-3 --kafka-version 1.0.0
# Start StreamSets Data Collector on the same cluster network.
2016-10-04 15:31:21,747 [tserver.TabletServer] INFO : Started replication service on node-8.cluster:10002
2016-10-04 15:31:22,711 [Audit ] INFO : operation: permitted; user: root; client: 192.168.124.8:48556; action: authenticate;
2016-10-04 15:31:22,714 [Audit ] INFO : operation: permitted; user: root; client: 192.168.124.8:48556; action: performSystemAction; principal: root;
2016-10-04 15:31:23,268 [watcher.MonitorLog4jWatcher] INFO : Changing monitor log4j address to node-7.cluster:4560
2016-10-04 15:31:23,268 [watcher.MonitorLog4jWatcher] INFO : Enabled log-forwarding
2016-10-04 15:31:26,274 [tserver.TabletServer] INFO : Loading tablet +r<<
2016-10-04 15:31:26,277 [tserver.TabletServer] INFO : node-8.cluster:10011: got assignment from master: +r<<
2016-10-04 15:31:26,323 [conf.ConfigSanityCheck] WARN : Use of instance.dfs.uri and instance.dfs.dir are deprecated. Consider using instance.volumes instead.
2016-10-04 15:31:26,574 [conf.ConfigSanityCheck] WARN : Use of instance.dfs.uri and instance.dfs.dir
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0