Skip to content

Instantly share code, notes, and snippets.

@hivefans
hivefans / topkMR.java
Last active March 17, 2020 02:04 — forked from USCSU/topkMR.java
|-|{"files":{"topkMR.java":{"env":"plain"}},"tag":"bigdata"}
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
@hivefans
hivefans / ftp_download.py
Last active March 17, 2020 02:04 — forked from turbidsoul/ftp_download.py
ftp下载显示现在进度|-|{"files":{"ftp_download.py":{"env":"plain"}},"tag":"bigdata"}
import ftplib
import sys
from progressbar import ProgressBar
from progressbar.widgets import Percentage, FileTransferSpeed, ETA, Bar
if len(sys.argv) < 4:
print("请输入ftp地址,用户,密码和文件".decode("utf8"))
sys.exit(0)
@hivefans
hivefans / 115.py
Last active March 17, 2020 02:03 — forked from wusuopu/115.py
a script auto to login 115|-|{"files":{"115.py":{"env":"plain"}},"tag":"bigdata"}
#!/usr/bin/env python
#-*- coding:utf-8 -*-
##
##
# Copyright (C)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation.
# 本程序是免费软件,基于GPL许可发布。
@hivefans
hivefans / JTarUtils.java
Last active March 17, 2020 02:03 — forked from johnkil/JTarUtils.java
Implementation of two versions of the utilities to decompress tar.gz archives (apache tar & jtar).|-|{"files":{"JTarUtils.java":{"env":"plain"},"TarUtils.java":{"env":"plain"}},"tag":"Uncategorized"}
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.zip.GZIPInputStream;
import org.xeustechnologies.jtar.TarEntry;
import org.xeustechnologies.jtar.TarInputStream;
@hivefans
hivefans / flume.conf
Last active March 17, 2020 02:03 — forked from ottomata/flume.conf
|-|{"files":{"flume.conf":{"env":"plain"}},"tag":"bigdata"}
webrequest.channels = file-channel
webrequest.sources = udp2log
webrequest.sinks = hdfs-sink
# Channel which buffers events on disk
webrequest.channels.file-channel.type = file
webrequest.channels.file-channel.checkpointDir = /var/lib/hadoop/data/e/flume/file-channel/checkpoint
webrequest.channels.file-channel.dataDirs = /var/lib/hadoop/data/e/flume/file-channel/data
webrequest.channels.file-channel.checkpointInterval = 1000
@hivefans
hivefans / HBaseNewAPI.scala
Last active March 17, 2020 02:03 — forked from wuchong/HBaseNewAPI.scala
Spark 下 操作 HBase 1.0.0 新版API|-|{"files":{"SparkOnHBase.scala":{"env":"plain"},"build.sbt":{"env":"plain"},"HBaseNewAPI.scala":{"env":"plain"}},"tag":"Uncategorized"}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, TableName, HBaseConfiguration}
import org.apache.hadoop.hbase.client._
import org.apache.spark.SparkContext
import scala.collection.JavaConversions._
/**
* HBase 1.0.0 新版API, CRUD 的基本操作代码示例
**/
object HBaseNewAPI {
@hivefans
hivefans / flume-ng-agent.sh
Last active March 17, 2020 02:03 — forked from ashrithr/flume-ng-agent.sh
Custom Flume NG Agent INIT script for centos for runnig multiple agents on same machine|-|{"files":{"flume-ng-agent.sh":{"env":"plain"},"usage.md":{"env":"plain"}},"tag":"Uncategorized"}
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
@hivefans
hivefans / flume-ng-configure
Last active March 17, 2020 02:03
|-|{"files":{"flume-ng-configure":{"env":"plain"}},"tag":"bigdata"}
emitter.sources = access avro1 thrift1 nc1
emitter.channels = c1
emitter.sinks = s1 s2 s3
emitter.sinkgroups = g1
emitter.sources.access.channels = c1
emitter.sources.access.type = exec
emitter.sources.access.command = tail -F -n 0 --pid `ps -o ppid= $$` /tmp/access.log | sed -e "s/^/host=`hostname --fqdn` category=access:/"
emitter.sources.access.shell = /bin/sh -c
@hivefans
hivefans / kafka-init.sh
Last active March 17, 2020 02:04
|-|{"files":{"kafka-init.sh":{"env":"plain"}},"tag":"bigdata"}
#!/bin/sh
#
# chkconfig: 345 99 01
# description: Kafka
#
# File : Kafka
#
# Description: Starts and stops the Kafka server
#
@hivefans
hivefans / logstash.index.json
Last active March 17, 2020 02:03 — forked from WPsites/logstash.index.json
Elasticsearch index template for logstash that contains additional NGINX fields|-|{"files":{"logstash.index.json":{"env":"plain"}},"tag":"bigdata"}
{
"template_logstash":{
"template" : "logstash*",
"settings" : {
"number_of_shards" : 5,
"index.cache.field.type" : "soft",
"index.refresh_interval" : "5s",
"index.store.compress.stored" : true,
"index.query.default_field" : "message",
"index.routing.allocation.total_shards_per_node" : 5