Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save vijaypolsani/e053c9d0589ca5dbb452 to your computer and use it in GitHub Desktop.
Save vijaypolsani/e053c9d0589ca5dbb452 to your computer and use it in GitHub Desktop.
streamset-cookbooks-solo
.vagrant
# Bundler
bin/*
.bundle/*
.kitchen/
.kitchen.local.yml
Berksfile.lock
streamsets-cookbooks-8180c47faf1e68e4ff921460d84f281b833d14b4
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<option name="DEFAULT_COMPILER" value="Javac" />
<resourceExtensions />
<wildcardResourcePatterns>
<entry name="!?*.java" />
<entry name="!?*.form" />
<entry name="!?*.class" />
<entry name="!?*.groovy" />
<entry name="!?*.scala" />
<entry name="!?*.flex" />
<entry name="!?*.kt" />
<entry name="!?*.clj" />
</wildcardResourcePatterns>
<annotationProcessing>
<profile default="true" name="Default" enabled="false">
<processorPath useClasspath="true" />
</profile>
</annotationProcessing>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<option name="DEFAULT_COMPILER" />
<resourceExtensions />
<wildcardResourcePatterns>
<entry name="!?*.java" />
<entry name="!?*.form" />
<entry name="!?*.class" />
<entry name="!?*.groovy" />
<entry name="!?*.scala" />
<entry name="!?*.flex" />
<entry name="!?*.kt" />
<entry name="!?*.clj" />
</wildcardResourcePatterns>
<annotationProcessing>
<profile default="true" name="Default" enabled="false">
<processorPath useClasspath="true" />
</profile>
</annotationProcessing>
</component>
<component name="CopyrightManager" default="" />
<component name="DaemonCodeAnalyzer">
<disable_hints />
</component>
<component name="DependencyValidationManager">
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
</component>
<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
<component name="GradleLocalSettings">
<option name="modificationStamps">
<map>
<entry key="$USER_HOME$/_intellij/spring-integration" value="5724149136000" />
</map>
</option>
</component>
<component name="IdProvider" IDEtalkID="40B324CED0BE1E1ACCA77BAA4BD671DE" />
<component name="ProjectInspectionProfilesVisibleTreeState">
<entry key="Project Default">
<profile-state>
<expanded-state>
<State>
<id />
</State>
<State>
<id>Android Lint</id>
</State>
<State>
<id>Class structure</id>
</State>
<State>
<id>Control flow issues</id>
</State>
<State>
<id>Error handling</id>
</State>
<State>
<id>Probable bugs</id>
</State>
</expanded-state>
<selected-state>
<State>
<id>Abstraction issues</id>
</State>
</selected-state>
</profile-state>
</entry>
</component>
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
<OptionsSetting value="true" id="Remove" />
<OptionsSetting value="true" id="Checkout" />
<OptionsSetting value="true" id="Update" />
<OptionsSetting value="true" id="Status" />
<OptionsSetting value="true" id="Edit" />
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectModuleManager">
<modules />
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" assert-keyword="true" jdk-15="true" project-jdk-name="1.8" project-jdk-type="JavaSDK" />
<component name="PropertiesComponent">
<property name="GoToClass.includeLibraries" value="false" />
<property name="GoToClass.toSaveIncludeLibraries" value="false" />
<property name="GoToFile.includeJavaFiles" value="false" />
<property name="MemberChooser.sorted" value="false" />
<property name="MemberChooser.showClasses" value="true" />
<property name="MemberChooser.copyJavadoc" value="false" />
<property name="project.structure.last.edited" value="SDKs" />
<property name="project.structure.proportion" value="0.0" />
<property name="project.structure.side.proportion" value="0.2" />
<property name="options.lastSelected" value="vcs.Subversion" />
<property name="options.splitter.main.proportions" value="0.3" />
<property name="options.splitter.details.proportions" value="0.2" />
<property name="options.searchVisible" value="true" />
<property name="last_opened_file_path" value="$USER_HOME$/_intellij/lia" />
</component>
<component name="PyConsoleOptionsProvider">
<option name="myPythonConsoleState">
<console-settings />
</option>
</component>
<component name="RunManager">
<configuration default="true" type="#org.jetbrains.idea.devkit.run.PluginConfigurationType" factoryName="Plugin">
<module name="" />
<option name="VM_PARAMETERS" value="-Xmx512m -Xms256m -XX:MaxPermSize=250m -ea" />
<option name="PROGRAM_PARAMETERS" />
<method />
</configuration>
<configuration default="true" type="Remote" factoryName="Remote">
<option name="USE_SOCKET_TRANSPORT" value="true" />
<option name="SERVER_MODE" value="false" />
<option name="SHMEM_ADDRESS" value="javadebug" />
<option name="HOST" value="localhost" />
<option name="PORT" value="5005" />
<method />
</configuration>
<configuration default="true" type="Applet" factoryName="Applet">
<module name="" />
<option name="MAIN_CLASS_NAME" />
<option name="HTML_FILE_NAME" />
<option name="HTML_USED" value="false" />
<option name="WIDTH" value="400" />
<option name="HEIGHT" value="300" />
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy" />
<option name="VM_PARAMETERS" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
<option name="ALTERNATIVE_JRE_PATH" />
<method />
</configuration>
<configuration default="true" type="TestNG" factoryName="TestNG">
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
<module name="" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
<option name="ALTERNATIVE_JRE_PATH" />
<option name="SUITE_NAME" />
<option name="PACKAGE_NAME" />
<option name="MAIN_CLASS_NAME" />
<option name="METHOD_NAME" />
<option name="GROUP_NAME" />
<option name="TEST_OBJECT" value="CLASS" />
<option name="VM_PARAMETERS" value="-ea" />
<option name="PARAMETERS" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="OUTPUT_DIRECTORY" />
<option name="ANNOTATION_TYPE" />
<option name="ENV_VARIABLES" />
<option name="PASS_PARENT_ENVS" value="true" />
<option name="TEST_SEARCH_SCOPE">
<value defaultName="moduleWithDependencies" />
</option>
<option name="USE_DEFAULT_REPORTERS" value="false" />
<option name="PROPERTIES_FILE" />
<envs />
<properties />
<listeners />
<method />
</configuration>
<configuration default="true" type="Application" factoryName="Application">
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
<option name="MAIN_CLASS_NAME" />
<option name="VM_PARAMETERS" />
<option name="PROGRAM_PARAMETERS" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
<option name="ALTERNATIVE_JRE_PATH" />
<option name="ENABLE_SWING_INSPECTOR" value="false" />
<option name="ENV_VARIABLES" />
<option name="PASS_PARENT_ENVS" value="true" />
<module name="" />
<envs />
<method />
</configuration>
<configuration default="true" type="JUnit" factoryName="JUnit">
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
<module name="" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
<option name="ALTERNATIVE_JRE_PATH" />
<option name="PACKAGE_NAME" />
<option name="MAIN_CLASS_NAME" />
<option name="METHOD_NAME" />
<option name="TEST_OBJECT" value="class" />
<option name="VM_PARAMETERS" value="-ea" />
<option name="PARAMETERS" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="ENV_VARIABLES" />
<option name="PASS_PARENT_ENVS" value="true" />
<option name="TEST_SEARCH_SCOPE">
<value defaultName="moduleWithDependencies" />
</option>
<envs />
<patterns />
<method />
</configuration>
<list size="0" />
<configuration name="&lt;template&gt;" type="WebApp" default="true" selected="false">
<Host>localhost</Host>
<Port>5050</Port>
</configuration>
</component>
<component name="SvnConfiguration" myUseAcceleration="nothing">
<configuration useDefault="false">$USER_HOME$/.subversion</configuration>
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="VcsManagerConfiguration">
<option name="myTodoPanelSettings">
<TodoPanelSettings />
</option>
</component>
<component name="masterDetails">
<states>
<state key="GlobalLibrariesConfigurable.UI">
<settings>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
<state key="JdkListConfigurable.UI">
<settings>
<last-edited>1.8</last-edited>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
<state key="ProjectJDKs.UI">
<settings>
<last-edited>1.8</last-edited>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
<state key="ProjectLibrariesConfigurable.UI">
<settings>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
</states>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/streamsets-cookbooks-8180c47faf1e68e4ff921460d84f281b833d14b4.iml" filepath="$PROJECT_DIR$/.idea/streamsets-cookbooks-8180c47faf1e68e4ff921460d84f281b833d14b4.iml" />
</modules>
</component>
</project>
<component name="DependencyValidationManager">
<state>
<option name="SKIP_IMPORT_STATEMENTS" value="false" />
</state>
</component>
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="FacetManager">
<facet type="JRUBY" name="JRuby">
<configuration>
<JRUBY_FACET_CONFIG_ID NAME="JRUBY_SDK_NAME" VALUE="" />
<LOAD_PATH number="0" />
<I18N_FOLDERS number="0" />
</configuration>
</facet>
</component>
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TemplatesService">
<option name="TEMPLATE_FOLDERS">
<list>
<option value="$MODULE_DIR$/cookbooks/streamsets/templates" />
</list>
</option>
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="" />
</component>
</project>
/Users/vijay.polsani/.berkshelf/vagrant-berkshelf/shelves/berkshelf20150820-13284-1ep28t4-sdc
-----BEGIN RSA PRIVATE KEY-----
MIIEpAIBAAKCAQEAxSo/6f6oMAiWUdJi8Tw/fsPeLbsyrxeFh3llmA9OKE2fFJWK
NOm3xiCHXFlLTRpi9FU28bRvARQ6lerYdzjcgWU4B+/LxfDHRE+ExBczzQpQSQ4k
xtX0asORzFuirkS8WUl7w7Flhe1szPf/4OinqWH8Cpt6127cWQBLwgFRXpHcDEyj
i3YkJOJ9GK7H31qoykR0UVD6TA97/xCqB97vQnBlcl0UvVlV0DrgYYU5fjh04BXI
pLvtBCNKfNy2Kamnwb4xjoCCm1AWlE7F27yd5oC84495SPiyeHymXkfisjRFGxl1
GS3jtNoK4pwE15nKR1nSK7RXixRVWyZ4Wsup7wIDAQABAoIBADgJE/Uq/R35ss86
Xgh1uQ4MGN8ibvs4Uij4pZkb8Hif5vBwt8DmE+bpY88pSa92Kl2KwnzbbRj/aM8m
2bK4pGLaI5yN2Ho/PkL53p/kgEJsBfldXbJtKHQXdR6BhLDGtEkkC1rI1jPEEMII
Ud7YAJ6xTHKvUbBcv59D6EQRH9EiX4d8Lr7gHBkSeS2QJRafyATvi0NgeE3XyYFg
igOuHNQn8iVV+bYSAjpkPfoH+GPXNqb0M+Sfk0uIjAvlu2TLnP/BZgNNjORzNe3/
tvm00kSW3iENq8vuOP6B5QVhRYPEA1gBnzQM1i3m/orovHbu4nkYANaFPWOyDF3X
hJz5oOECgYEA513ejYnzbQP69rSRGkcB0CiMmfW8xBtYZLBbfeS9MdB/1V4Yt0dT
zGG4CoSfoLmGBQOGl/pBc0q19qD3eI/KE2O9GcU4yBeVnpxufmxnokJgXucTm27D
56D13iSh0lpDyWZEzNkzmupaFtWn1fdfAA9NgWmDc9Tj/JkiD1nLX9cCgYEA2igu
LBCI+CZk7IKcJL87NG7s7jEV4Q3i1Yn/7ADvNZDhjMpZUwrSC/8RACnyntiMlB3V
KgCbaGQwAbCarJea/6nP3UDxhad9TTUD6w3cViQm7U/qAE0Gdirb+a/YU/Nrq9ag
0yn1eIA1unavMhi6NQMLN4RXQ75VSlP0fPQpI6kCgYEArknXMfY+itvZvOjO+eWN
pxo0uHIWbgBPeSIk+dRK9XCkK75DjiVMKgVe14WKnk/DdpluVaQuk8QvCQe/BHIE
/tA+jmA0Yv7pNd+PmOresquf3z3hVlB9gUqOjHY59LUMpXXF1C62ojdyAAuY8CT1
ypy8fQVYAfTYfdt5fejdvGcCgYEAx3WVQZScpsXz4IpKWQ6/AuGont8iMfKQQs05
wH3aOA+PKTqZEOQDyBQdZxjzS2iSn5nnG9ASRw/TFLhzsvmqLbcE5jZKnPjWB5FB
U9gIbHoBXViDimLiVYg6N8Ae+iLFfwdtY6QpW1NdzG5XIGabWXy+9LN7VeTf9mVs
VBKoyoECgYBzpKl397d4qHBIQUb7VeYxUdphxKSDfLwcC3Z7KAcTJE2bqVak6R3M
QTI6dsI6KVrdOAXgLFk+cHpm/Uak26icbTVjqOmZ0SYRg7XJVvkK2gPdIN035tQ8
aifqWyrrCYNZBBhdfjQEwvvjd5euxwvmPo2RwUmgnGDIAiAsO6c7nw==
-----END RSA PRIVATE KEY-----
{"virtualbox":{"/tmp":{"guestpath":"/tmp","hostpath":"/Users/vijay.polsani/Downloads","disabled":false},"/vagrant":{"guestpath":"/vagrant","hostpath":"/Users/vijay.polsani/Downloads/streamsets-cookbooks-8180c47faf1e68e4ff921460d84f281b833d14b4/cookbooks/streamsets","disabled":false},"v-csc-404a71c01":{"id":"v-csc-404a71c01","guestpath":"/tmp/vagrant-chef/8229a375f00b3f8e243d434f6d763455/cookbooks","hostpath":"/Users/vijay.polsani/.berkshelf/vagrant-berkshelf/shelves/berkshelf20150820-13284-1ep28t4-sdc","disabled":false}}}
default['java']['install_flavor'] = 'oracle'
default['java']['jdk_version'] = '7'
default['java']['oracle']['accept_oracle_download_terms'] = true
default['streamsets']['streamsets']['package_name'] = 'streamsets-datacollector'
default['streamsets']['streamsets']['version'] = '1.0.0b2-1'
default['streamsets']['apache-kafka']['package_name'] = 'streamsets-datacollector-apache-kafka_0.8.2.0-lib'
default['streamsets']['apache-kafka']['version'] = '1.0.0b2-1'
default['streamsets']['sdc-cli']['version'] = '0.1.0'
default['streamsets']['pipeline']['clusterSlaveMemory'] = 1024
default['streamsets']['pipeline']['clusterSlaveJavaOpts'] = '-XX:PermSize=128M -XX:MaxPermSize=256M'
default['streamsets']['pipeline']['clusterKerberos'] = 'false'
default['streamsets']['pipeline']['configuration']['fileInfos'] = {}
default['streamsets']['sdc']['http_port'] = 18630
default['streamsets']['sdc']['https_port'] = -1
default['streamsets']['sdc']['execution_mode'] = 'standalone'
default['streamsets']['sdc']['environment'] = {}
#########Content in lithium_streamsets
default['streamsets']['pipeline']['name'] = 'Firehose_File_Kafka'
default['streamsets']['pipeline']['clusterSlaveMemory'] = 384
default['streamsets']['pipeline']['configuration']['topicExpression'] = "${record:attribute('tag')}"
default['streamsets']['pipeline']['configuration']['metadataBrokerList'] = 'sjc1-kafka-1a-br1.sj.lithium.com:9092,sjc1-kafka-1a-br2.sj.lithium.com:9092,sjc1-kafka-1a-br3.sj.lithium.com:9092'
DEPENDENCIES
streamsets
path: .
metadata: true
GRAPH
build-essential (2.2.3)
java (1.35.0)
python (1.4.10)
build-essential (>= 0.0.0)
yum-epel (>= 0.0.0)
streamsets (0.1.3)
java (>= 0.0.0)
python (>= 0.0.0)
yum (3.6.3)
yum-epel (0.6.2)
yum (~> 3.2)

streamsets CHANGELOG

This file is used to list changes made in each version of the streamsets cookbook.

0.1.0

  • [your_name] - Initial release of streamsets

Check the Markdown Syntax Guide for help with Markdown.

The Github Flavored Markdown page describes the differences between markdown on github and standard markdown.

# Put files/directories that should be ignored in this file when uploading
# or sharing to the community site.
# Lines that start with '# ' are comments.
# OS generated files #
######################
.DS_Store
Icon?
nohup.out
ehthumbs.db
Thumbs.db
# SASS #
########
.sass-cache
# EDITORS #
###########
\#*
.#*
*~
*.sw[a-z]
*.bak
REVISION
TAGS*
tmtags
*_flymake.*
*_flymake
*.tmproj
.project
.settings
mkmf.log
## COMPILED ##
##############
a.out
*.o
*.pyc
*.so
*.com
*.class
*.dll
*.exe
*/rdoc/
# Testing #
###########
.watchr
.rspec
spec/*
spec/fixtures/*
test/*
features/*
Guardfile
Procfile
# SCM #
#######
.git
*/.git
.gitignore
.gitmodules
.gitconfig
.gitattributes
.svn
*/.bzr/*
*/.hg/*
*/.svn/*
# Berkshelf #
#############
Berksfile
Berksfile.lock
cookbooks/*
tmp
# Cookbooks #
#############
CONTRIBUTING
CHANGELOG*
# Strainer #
############
Colanderfile
Strainerfile
.colander
.strainer
# Vagrant #
###########
.vagrant
Vagrantfile
# Travis #
##########
.travis.yml
# Pulled logic from Couchbase cookbook
# https://github.com/disney/couchbase/blob/441064912f4781b658b9c16c67e8b4a03f480f21/libraries/helper.rb
require 'chef/mixin/shell_out'
require "net/http"
module StreamsetsHelpers
class Server
extend Chef::Mixin::ShellOut
def self.service_listening?(port)
netstat_command = "netstat -lnt"
cmd = shell_out!(netstat_command)
Chef::Log.debug("`#{netstat_command}` returned: \n\n #{cmd.stdout}")
cmd.stdout.each_line.select do |l|
l.split[3] =~ /#{port}/
end.any?
end
def self.endpoint_responding?(uri)
response = Net::HTTP.get_response(uri)
if response.kind_of?(Net::HTTPSuccess) || response.kind_of?(Net::HTTPRedirection) || response.kind_of?(Net::HTTPForbidden)
Chef::Log.debug("GET to #{uri} successful")
return true
else
Chef::Log.debug("GET to #{uri} returned #{response.code} / #{response.class}")
return false
end
rescue EOFError, Errno::ECONNREFUSED
Chef::Log.debug("GET to #{uri} returned EOFError or Errno::ECONNREFUSED")
return false
end
end
end
name 'streamsets'
maintainer 'StreamSets'
maintainer_email '[email protected]'
license 'All rights reserved'
description 'Installs/Configures StreamSets Data Collector'
long_description IO.read(File.join(File.dirname(__FILE__), 'README.md'))
version '0.1.3'
depends 'java'
depends 'python'
def whyrun_supported?
true
end
action :create do
pipeline_exists = false
if ::File.exists?(get_filename)
pipeline_exists = true
end
tpl = template get_filename do
source 'pipeline.json.erb'
owner 'sdc'
group 'sdc'
mode '0644'
variables (
{
:pipeline_name => node[cookbook_name]['pipeline']['name'],
:clusterSlaveMemory => node[cookbook_name]['pipeline']['clusterSlaveMemory'],
:clusterSlaveJavaOpts => node[cookbook_name]['pipeline']['clusterSlaveJavaOpts'],
:clusterKerberos => node[cookbook_name]['pipeline']['clusterKerberos'],
:config => node[cookbook_name]['pipeline']['configuration']
}
)
action :nothing
end
tpl.run_action(:create)
resource_updated = tpl.updated_by_last_action?
if resource_updated
if pipeline_exists
stop_pipeline
end
import_pipeline
start_pipeline
end
new_resource.updated_by_last_action(resource_updated)
end
action :remove do
stop_pipeline
file get_filename do
action :delete
end
end
private
def get_filename
if new_resource.config_file.nil? or new_resource.config_file.empty?
::File.join('/opt/sdc/current', "pipeline-#{new_resource.name}.json")
else
new_resource.config_file
end
end
def import_pipeline
filename = get_filename
execute "import-pipeline #{new_resource.name}" do
user 'sdc'
command "sdc-cli --sdc-url http://localhost:#{node[cookbook_name]['sdc']['http_port']} --sdc-user admin --sdc-password admin --config-file #{::File.join('/tmp', new_resource.name)}.conf --auth-type form library import #{filename}"
end
end
def start_pipeline
execute "start-pipeline #{new_resource.name}" do
user 'sdc'
command "sdc-cli --sdc-url http://localhost:#{node[cookbook_name]['sdc']['http_port']} --sdc-user admin --sdc-password admin --config-file #{::File.join('/tmp', new_resource.name)}.conf pipeline start #{new_resource.name}"
end
end
def stop_pipeline
execute "stop-pipeline #{new_resource.name}" do
user 'sdc'
command "sdc-cli --sdc-url http://localhost:#{node[cookbook_name]['sdc']['http_port']} --sdc-user admin --sdc-password admin --config-file #{::File.join('/tmp', new_resource.name)}.conf pipeline stop"
end
end
def reset_pipeline
execute "reset-pipeline #{new_resource.name}" do
user 'sdc'
command "sdc-cli --sdc-url http://localhost:#{node[cookbook_name]['sdc']['http_port']} --sdc-user admin --sdc-password admin --config-file #{::File.join('/tmp', new_resource.name)}.conf pipeline reset-origin #{new_resource.name}"
end
end

streamsets Cookbook

TODO: Enter the cookbook description here.

e.g. This cookbook makes your favorite breakfast sandwich.

Requirements

TODO: List your cookbook requirements. Be sure to include any requirements this cookbook has on platforms, libraries, other cookbooks, packages, operating systems, etc.

e.g.

packages

  • toaster - streamsets needs toaster to brown your bagel.

Attributes

TODO: List your cookbook attributes here.

e.g.

streamsets::default

Key Type Description Default
['streamsets']['bacon'] Boolean whether to include bacon true

Usage

streamsets::default

TODO: Write usage instructions for each cookbook.

e.g. Just include streamsets in your node's run_list:

{
  "name":"my_node",
  "run_list": [
    "recipe[streamsets]"
  ]
}

Contributing

TODO: (optional) If this is a public cookbook, detail the process for contributing. If this is a private cookbook, remove this section.

e.g.

  1. Fork the repository on Github
  2. Create a named feature branch (like add_component_x)
  3. Write your change
  4. Write tests for your change (if applicable)
  5. Run the tests, ensuring they all pass
  6. Submit a Pull Request using Github

License and Authors

Authors: TODO: List authors

#
# Cookbook Name:: streamsets
# Recipe:: default
#
# Copyright 2015, YOUR_COMPANY_NAME
#
# All rights reserved - Do Not Redistribute
#
include_recipe 'java'
unless node[cookbook_name]['sdc']['environment'].include?('JAVA_HOME')
streamset_env = node[cookbook_name]['sdc']['environment'].dup
streamset_env['JAVA_HOME'] = node['java']['java_home']
node.set[cookbook_name]['sdc']['environment'] = streamset_env
end
package node[cookbook_name]['streamsets']['package_name'] do
action :install
#Local test for individual cookbook
source '/tmp/streamsets-datacollector-1.0.0b2-1.rpm'
end
package node[cookbook_name]['apache-kafka']['package_name'] do
action :install
#Local test for individual cookbook
source '/tmp/streamsets-datacollector-apache-kafka_0.8.2.0-lib-1.0.0b2-1.rpm'
end
include_recipe 'python::pip'
python_pip 'requests' do
action :install
end
python_pip ' ' do
action :install
options '-e /tmp/sdc-cli-0.1.0'
#version node[cookbook_name]['sdc-cli']['version'] if !node[cookbook_name]['sdc-cli']['version'].nil?
end
template '/etc/sdc/sdc.properties' do
source 'sdc.properties.erb'
owner 'sdc'
group 'sdc'
mode '0600'
variables (
{
:http_port => node[cookbook_name]['sdc']['http_port'],
:https_port => node[cookbook_name]['sdc']['https_port'],
:execution_mode => node[cookbook_name]['sdc']['execution_mode']
}
)
notifies :restart, "service[sdc]", :delayed
end
template '/opt/sdc/current/libexec/sdcd-env.sh' do
source 'sdcd-env.sh.erb'
owner 'root'
group 'root'
mode '0755'
variables (
{
:env => node[cookbook_name]['sdc']['environment']
}
)
notifies :restart, "service[sdc]", :delayed
end
# Pulled logic from Couchbase cookbook
# https://github.com/disney/couchbase/blob/441064912f4781b658b9c16c67e8b4a03f480f21/recipes/server.rb#L73
ruby_block "sdc_block_until_operational" do
block do
max_retries = 10
retry_count = 0
Chef::Log.info("Waiting until SDC is listening on port #{node[cookbook_name]['sdc']['http_port']}")
until StreamsetsHelpers::Server.service_listening?(node[cookbook_name]['sdc']['http_port']) do
retry_count += 1
raise "SDC is not listening on port #{node[cookbook_name]['sdc']['http_port']}. Retried #{retry_count} times before aborting." unless retry_count <= max_retries
sleep 5
Chef::Log.debug(".")
end
retry_count = 0
Chef::Log.info("Waiting until the SDC admin API is responding")
test_uri = URI("http://localhost:#{node[cookbook_name]['sdc']['http_port']}/ping")
until StreamsetsHelpers::Server.endpoint_responding?(test_uri) do
retry_count += 1
raise "SDC API is not responding on port #{node[cookbook_name]['sdc']['http_port']}. Retried #{retry_count} times before aborting." unless retry_count <= max_retries
sleep 1
Chef::Log.debug(".")
end
end
action :nothing
subscribes :run, 'service[sdc]', :immediately
end
service 'sdc' do
action [:start, :enable]
supports :status => true, :start => true, :stop => true, :restart => true
end
streamsets_config node[cookbook_name]['pipeline']['name'] do
action :create
end
actions :create, :remove
default_action :create
attribute :name, :kind_of => String, :name_attribute => true
attribute :config_file, :kind_of => String
{
"pipelineConfig" : {
"schemaVersion" : 1,
"uuid" : "b80275cb-7eca-470f-940a-f2f631616d63",
"description" : "Reading log files and pushing them to Kafka",
"configuration" : [ {
"name" : "executionMode",
"value" : "STANDALONE"
}, {
"name" : "clusterSlaveMemory",
"value" : <%= @clusterSlaveMemory %>
}, {
"name" : "clusterSlaveJavaOpts",
"value" : "<%= @clusterSlaveJavaOpts %>"
}, {
"name" : "clusterKerberos",
"value" : <%= @clusterKerberos %>
}, {
"name" : "kerberosPrincipal",
"value" : ""
}, {
"name" : "kerberosKeytab",
"value" : ""
}, {
"name" : "clusterLauncherEnv",
"value" : [ ]
}, {
"name" : "deliveryGuarantee",
"value" : "AT_LEAST_ONCE"
}, {
"name" : "badRecordsHandling",
"value" : "streamsets-datacollector-basic-lib::com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget::1.0.0"
}, {
"name" : "constants",
"value" : [ ]
}, {
"name" : "memoryLimitExceeded",
"value" : "STOP_PIPELINE"
}, {
"name" : "memoryLimit",
"value" : "${jvm:maxMemoryMB() * 0.65}"
} ],
"uiInfo" : {
"previewConfig" : {
"previewSource" : "CONFIGURED_SOURCE",
"batchSize" : 10,
"writeToDestinations" : false,
"rememberMe" : false
}
},
"stages" : [ {
"instanceName" : "com_streamsets_pipeline_stage_origin_logtail_FileTailDSource1437417467994",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_origin_logtail_FileTailDSource",
"stageVersion" : "1.0.0",
"configuration" : [ {
"name" : "dataFormat",
"value" : "TEXT"
}, {
"name" : "multiLineMainPattern",
"value" : null
}, {
"name" : "charset",
"value" : "UTF-8"
}, {
"name" : "removeCtrlChars",
"value" : false
}, {
"name" : "maxLineLength",
"value" : 10240
}, {
"name" : "batchSize",
"value" : 1000
}, {
"name" : "maxWaitTimeSecs",
"value" : 5
}, {
"name" : "fileInfos",
"value" : [
<% @config['fileInfos'].each_with_index do |(tag, file_info), index| %>
{
"tag" : "<%= tag %>",
"fileFullPath" : "<%= file_info['fileFullPath'] %>",
"fileRollMode" : "<%= file_info['fileRollMode'] %>",
"patternForToken" : "<%= file_info['patternForToken'] %>",
"firstFile" : "<%= file_info['firstFile'] %>"
}<% if index < (@config['fileInfos'].length - 1) %>,<% end %>
<% end %>
]
}, {
"name" : "postProcessing",
"value" : "NONE"
}, {
"name" : "archiveDir",
"value" : null
}, {
"name" : "logMode",
"value" : "COMMON_LOG_FORMAT"
}, {
"name" : "retainOriginalLine",
"value" : false
}, {
"name" : "customLogFormat",
"value" : "%h %l %u %t \"%r\" %>s %b"
}, {
"name" : "regex",
"value" : "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)"
}, {
"name" : "fieldPathsToGroupName",
"value" : [ {
"fieldPath" : "/",
"group" : 1
} ]
}, {
"name" : "grokPatternDefinition",
"value" : null
}, {
"name" : "grokPattern",
"value" : "%{COMMONAPACHELOG}"
}, {
"name" : "enableLog4jCustomLogFormat",
"value" : false
}, {
"name" : "log4jCustomLogFormat",
"value" : "%r [%t] %-5p %c %x - %m%n"
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
} ],
"uiInfo" : {
"label" : "JSONLogFileReader",
"description" : "",
"xPos" : 64,
"yPos" : 45,
"stageType" : "SOURCE",
"icon" : "/rest/v1/definitions/stages/icon?name=com_streamsets_pipeline_stage_origin_logtail_FileTailDSource&library=streamsets-datacollector-basic-lib&version=1.0.0",
"outputStreamLabels" : [ "Output", "Metadata" ],
"rawSource" : {
"configuration" : [ {
"name" : "fileName"
} ]
}
},
"inputLanes" : [ ],
"outputLanes" : [ "com_streamsets_pipeline_stage_origin_logtail_FileTailDSource1437417467994OutputLane14374174679940", "com_streamsets_pipeline_stage_origin_logtail_FileTailDSource1437417467994OutputLane14374174679941" ]
}, {
"instanceName" : "com_streamsets_pipeline_stage_destination_kafka_KafkaDTarget1437417477872",
"library" : "streamsets-datacollector-apache-kafka_0_8_2_0-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_kafka_KafkaDTarget",
"stageVersion" : "1.0.0",
"configuration" : [ {
"name" : "metadataBrokerList",
"value" : "<%= @config['metadataBrokerList'] %>"
}, {
"name" : "runtimeTopicResolution",
"value" : true
}, {
"name" : "topicExpression",
"value" : "<%= @config['topicExpression'] %>"
}, {
"name" : "topicWhiteList",
"value" : "*"
}, {
"name" : "topic",
"value" : "stage"
}, {
"name" : "partitionStrategy",
"value" : "ROUND_ROBIN"
}, {
"name" : "partition",
"value" : "${0}"
}, {
"name" : "dataFormat",
"value" : "TEXT"
}, {
"name" : "charset",
"value" : "UTF-8"
}, {
"name" : "singleMessagePerBatch",
"value" : false
}, {
"name" : "csvFileFormat",
"value" : "CSV"
}, {
"name" : "kafkaProducerConfigs",
"value" : [ {
"key" : "queue.buffering.max.ms",
"value" : "5000"
}, {
"key" : "message.send.max.retries",
"value" : "90"
}, {
"key" : "retry.backoff.ms",
"value" : "10000"
} ]
}, {
"name" : "csvHeader",
"value" : "NO_HEADER"
}, {
"name" : "csvReplaceNewLines",
"value" : true
}, {
"name" : "jsonMode",
"value" : "MULTIPLE_OBJECTS"
}, {
"name" : "textFieldPath",
"value" : "/text"
}, {
"name" : "textEmptyLineIfNull",
"value" : false
}, {
"name" : "avroSchema",
"value" : null
}, {
"name" : "includeSchema",
"value" : true
}, {
"name" : "stageRequiredFields",
"value" : [ ]
}, {
"name" : "stageRecordPreconditions",
"value" : [ ]
}, {
"name" : "stageOnRecordError",
"value" : "TO_ERROR"
} ],
"uiInfo" : {
"label" : "STAGE_Kafka_Producer",
"description" : "",
"xPos" : 280,
"yPos" : 50,
"stageType" : "TARGET",
"icon" : "/rest/v1/definitions/stages/icon?name=com_streamsets_pipeline_stage_destination_kafka_KafkaDTarget&library=streamsets-datacollector-apache-kafka_0_8_2_0-lib&version=1.0.0"
},
"inputLanes" : [ "com_streamsets_pipeline_stage_origin_logtail_FileTailDSource1437417467994OutputLane14374174679940" ],
"outputLanes" : [ ]
}, {
"instanceName" : "com_streamsets_pipeline_stage_destination_devnull_NullDTarget1437417491170",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_NullDTarget",
"stageVersion" : "1.0.0",
"configuration" : [ ],
"uiInfo" : {
"label" : "Discard_Metadata",
"description" : "",
"xPos" : 281,
"yPos" : 182,
"stageType" : "TARGET",
"icon" : "/rest/v1/definitions/stages/icon?name=com_streamsets_pipeline_stage_destination_devnull_NullDTarget&library=streamsets-datacollector-basic-lib&version=1.0.0"
},
"inputLanes" : [ "com_streamsets_pipeline_stage_origin_logtail_FileTailDSource1437417467994OutputLane14374174679941" ],
"outputLanes" : [ ]
} ],
"errorStage" : {
"instanceName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget1437417723455",
"library" : "streamsets-datacollector-basic-lib",
"stageName" : "com_streamsets_pipeline_stage_destination_devnull_ToErrorNullDTarget",
"stageVersion" : "1.0.0",
"configuration" : [ ],
"uiInfo" : {
"label" : "Bad Records - Discard",
"description" : "",
"xPos" : 501,
"yPos" : 50,
"stageType" : "TARGET",
"icon" : "/assets/stage/defaultTarget.svg"
},
"inputLanes" : [ ],
"outputLanes" : [ ]
},
"info" : {
"name" : "<%= @pipeline_name %>",
"description" : "Reading log files and pushing them to Kafka",
"created" : 1437417389914,
"lastModified" : 1439403925446,
"creator" : "admin",
"lastModifier" : "admin",
"lastRev" : "0",
"uuid" : "b80275cb-7eca-470f-940a-f2f631616d63",
"valid" : true
},
"valid" : true,
"issues" : {
"issues" : [ ],
"issueCount" : 0,
"pipelineIssues" : [ ],
"stageIssues" : { }
},
"previewable" : true
},
"pipelineRules" : {
"metricsRuleDefinitions" : [ {
"id" : "badRecordsAlertID",
"alertText" : "High incidence of Bad Records",
"metricId" : "pipeline.batchErrorRecords.meter",
"metricType" : "METER",
"metricElement" : "METER_COUNT",
"condition" : "${value() > 100}",
"sendEmail" : false,
"enabled" : false,
"valid" : true
}, {
"id" : "stageErrorAlertID",
"alertText" : "High incidence of Error Messages",
"metricId" : "pipeline.batchErrorMessages.meter",
"metricType" : "METER",
"metricElement" : "METER_COUNT",
"condition" : "${value() > 100}",
"sendEmail" : false,
"enabled" : false,
"valid" : true
}, {
"id" : "idleGaugeID",
"alertText" : "Pipeline is Idle",
"metricId" : "RuntimeStatsGauge.gauge",
"metricType" : "GAUGE",
"metricElement" : "TIME_OF_LAST_RECEIVED_RECORD",
"condition" : "${time:now() - value() > 120000}",
"sendEmail" : false,
"enabled" : false,
"valid" : true
}, {
"id" : "batchTimeAlertID",
"alertText" : "Batch taking more time to process",
"metricId" : "RuntimeStatsGauge.gauge",
"metricType" : "GAUGE",
"metricElement" : "CURRENT_BATCH_AGE",
"condition" : "${value() > 200}",
"sendEmail" : false,
"enabled" : false,
"valid" : true
}, {
"id" : "memoryLimitAlertID",
"alertText" : "Memory limit for pipeline exceeded",
"metricId" : "pipeline.memoryConsumed.counter",
"metricType" : "COUNTER",
"metricElement" : "COUNTER_COUNT",
"condition" : "${value() > (jvm:maxMemoryMB() * 0.65)}",
"sendEmail" : false,
"enabled" : false,
"valid" : true
} ],
"dataRuleDefinitions" : [ ],
"emailIds" : [ ],
"uuid" : "d8733027-489d-4e83-bcd4-7f64aaab6b2e",
"ruleIssues" : [ ]
}
}
#
# (c) 2014 StreamSets, Inc. All rights reserved. May not
# be copied, modified, or distributed in whole or part without
# written consent of StreamSets, Inc.
#
# The base URL of the datacollector, used to create email alert messages.
# If not set http://<hostname>:<http.port> is used
# <hostname> is resolved using 'hostname -f'
#sdc.base.http.url=http://<hostname>:<port>
# HTTP configuration
# The port the data collector runs the SDC HTTP endpoint.
# If different that -1, the SDC will run on this port
# If 0, the SDC will pick up a random port
# If the https.port is different that -1 or 0 and http.port is different than -1 or 0, the HTTP endpoint
# will redirect to the HTTPS endpoint.
http.port=<%= @http_port %>
# HTTPS configuration
# The port the data collector runs the SDC HTTPS endpoint.
# If different that -1, the SDC will run over SSL on this port
# If 0, the SDC will pick up a random port
https.port=<%= @https_port %>
# Java keystore file, in the SDC 'etc/' configuration directory
https.keystore.path=keystore.jks
# Password for the keystore file,
# By default, the password is loaded from the 'sdc-keystore-password.txt'
# from the SDC 'etc/' configuration directory
[email protected]@
# The authentication for the HTTP endpoint of the data collector
# Valid values are: 'none', 'digest' and 'form'
#
http.authentication=form
# Runs the data collector within a Kerberos session which is propagated to all stages.
# This is useful for stages that require Kerberos authentication with the services they interact with
kerberos.client.enabled=false
# The kerberos principal to use for the Kerberos session.
# It should be a service principal. If the hostname part of the service principal is '_HOST' or '0.0.0.0',
# the hostname will be replaced with the actual complete hostname of the data collector as advertised by the
# unix command 'hostname -f'
kerberos.client.principal=sdc/[email protected]
# The location of the keytab file for the specified principal. If the path is relative, the keytab file will be
# looked under the data collector configuration directory
kerberos.client.keytab=sdc.keytab
# The realm used for authentication
# A file with the realm name and '.properties' extension must exist in the data collector configuration directory
# If this property is not set, the realm name is '<http.authentication>-realm'
#http.digest.realm=local-realm
preview.maxBatchSize=10
preview.maxBatches=10
production.maxBatchSize=1000
#This option determines the number of error records, per stage, that will be retained in memory when the pipeline is
#running. If set to zero, error records will not be retained in memory.
#If the specified limit is reached the oldest records will be discarded to make room for the newest one.
production.maxErrorRecordsPerStage=100
#This option determines the number of pipeline errors that will be retained in memory when the pipeline is
#running. If set to zero, pipeline errors will not be retained in memory.
#If the specified limit is reached the oldest error will be discarded to make room for the newest one.
production.maxPipelineErrors=100
# Max number of concurrent REST calls allowed for the /rest/v1/admin/log endpoint
max.logtail.concurrent.requests=5
# Max number of concurrent WebSocket calls allowed
max.webSockets.concurrent.requests=15
ui.local.help.base.url=/docs
ui.hosted.help.base.url=https://www.streamsets.com/documentation/1.0.0.b2/DCUserGuide/help
ui.refresh.interval.ms=2000
ui.jvmMetrics.refresh.interval.ms=4000
# SDC sends anonymous usage information using Google Analytics to StreamSets.
ui.enable.usage.data.collection=true
# If true SDC UI will use WebSocket to fetch pipeline status/metrics/alerts otherwise UI will poll every few seconds
# to get the Pipeline status/metrics/alerts.
ui.enable.webSocket=true
# Number of changes supported by undo/redo functionality.
# UI archives Pipeline Configuration/Rules in browser memory to support undo/redo functionality.
ui.undo.limit=10
# SMTP configuration to send alert emails
# All properties starting with 'mail.' are used to create the JavaMail session, supported protocols are 'smtp' & 'smtps'
mail.transport.protocol=smtp
mail.smtp.host=localhost
mail.smtp.port=25
mail.smtp.auth=false
mail.smtp.starttls.enable=false
mail.smtps.host=localhost
mail.smtps.port=465
mail.smtps.auth=false
# If 'mail.smtp.auth' or 'mail.smtps.auth' are to true, these properties are used for the user/password credentials,
# @email-password.txt@ will load the value from the 'email-password.txt' file in the config directory (where this file is)
xmail.username=foo
[email protected]@
# FROM email address to use for the messages
xmail.from.address=sdc@$localhost
#Value of this property is used to create email message with a link to the console.
#By default the hostname is used.
#sdc.base.http.url=http://localhost:18630
#Indicates the location where runtime configuration properties can be found.
#Value 'embedded' implies that the runtime configuration properties are present in this file and are prefixed with
#'runtime.conf_'.
#A value other than 'embedded' is treated as the name of a properties file from which the runtime configuration
#properties must be picked up. Note that the properties should not be prefixed with 'runtime.conf_' in this case.
runtime.conf.location=embedded
# SDC Execution mode - cluster|standalone
sdc.execution.mode=<%= @execution_mode %>
#Observer related
#The size of the queue where the pipeline queues up data rule evaluation requests.
#Each request is for a stream and contains sampled records for all rules that apply to that lane.
observer.queue.size=100
#Sampled records which pass evaluation are cached for user to view. This determines the size of the cache and there is
#once cache per data rule
observer.sampled.records.cache.size=100
#The time to wait before dropping a data rule evaluation request if the observer queue is full.
observer.queue.offer.max.wait.time.ms=1000
#!/bin/bash
#
# (c) 2014 StreamSets, Inc. All rights reserved. May not
# be copied, modified, or distributed in whole or part without
# written consent of StreamSets, Inc.
#
#
# This script is sourced when using the data collector initd scripts
# Refer to the initd/README file for details
#
# user that will run the data collector, it must exist in the system
#
export SDC_USER=sdc
# group of the user that will run the data collector, it must exist in the system
#
export SDC_GROUP=sdc
# directory where the data collector will store pipelines and their runtime information
#
export SDC_DATA=/var/lib/sdc
# directory where the data collector will read pipeline resource files from
#
export SDC_RESOURCES=/var/lib/sdc-resources
# directory where the data collector write its logs
#
export SDC_LOG=/var/log/sdc
# directory where the data collector will read its configuration
#
export SDC_CONF=/etc/sdc
# JVM options for the data collector process
#
export SDC_JAVA_OPTS="-Xmx1024m -XX:PermSize=128M -XX:MaxPermSize=256M -server ${SDC_JAVA_OPTS}"
export SDC_JAVA_OPTS="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=12345 ${SDC_JAVA_OPTS}"
export SDC_JAVA_OPTS="-Dcom.sun.management.jmxremote.local.only=false -Dcom.sun.management.jmxremote.authenticate=false ${SDC_JAVA_OPTS}"
export SDC_JAVA_OPTS="-Dcom.sun.management.jmxremote.ssl=false ${SDC_JAVA_OPTS}"
# Enables/disables the JVM security manager
#
export SDC_SECURITY_MANAGER_ENABLED=true
<% @env.each_pair do |name, value| %>
export <%= name %>="<%= value %>"
<% end %>
Vagrant.configure("2") do |config|
config.vm.network "forwarded_port", guest: 18630, host: 20000
config.vm.synced_folder "/Users/vijay.polsani/Downloads/", "/tmp/"
if Vagrant.has_plugin?("vagrant-cachier")
config.cache.scope = :box
end
if Vagrant.has_plugin?("vagrant-berkshelf")
config.berkshelf.enabled = true
end
config.vm.box = "chef/centos-6.5"
config.vm.provider :virtualbox do |vb|
vb.customize ["modifyvm", :id, "--memory", "1024"]
end
config.vm.define "sdc", primary: true, autostart: true do |server|
server.vm.provision :chef_solo do |chef|
chef.log_level = :info
chef.cookbooks_path = "../../cookbooks"
chef.json = {
}
chef.run_list = [
"recipe[streamsets::default]"
]
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment