try:
except (ConnectionError, ChunkedEncodingError, TooManyRedirects, NewConnectionError) as e:
logging.warn("Skip URL {} Reason: {}".format(url, e))
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
HOME_LIB_DIR=${HOME}/lib | |
# symlink list | |
HADOOP_SYMLINK=${HOME_LIB_DIR}/hadoop | |
HBASE_SYMLINK=${HOME_LIB_DIR}/hbase | |
ZOOKEEPER_SYMLINK=${HOME_LIB_DIR}/zookeeper | |
HIVE_SYMLINK=${HOME_LIB_DIR}/hive | |
PIG_SYMLINK=${HOME_LIB_DIR}/pig |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import hashlib | |
startKey = 2 | |
endKey = 6 | |
inputNum = 1000 | |
testNum = 100000 | |
def check_bl(bloom, a): | |
aa = hashlib.md5(a).hexdigest()[startKey:endKey] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import argparse | |
import sys | |
class MyArgParse(object): | |
def __init__(self): | |
pass | |
def sum(self): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# logging stdout/stderr | |
set -x | |
exec >> /root/bootstrap-master-init.log 2>&1 | |
date | |
# Master node identifier | |
touch /root/kafka-kudu-demo_edge-node.flag |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script compacts daily based csv files to monthly based parquet file. | |
# The CSV files should be named like "YYYY-MM-DD.csv" format. | |
# | |
# このスクリプトは日付毎のcsvファイルを月毎のparquetファイルに変換します。 | |
# CSVファイルの名前は"YYYY-MM-DD.csv"の形式にしてください。 | |
# | |
import pandas as pd | |
import numpy as np | |
import pyarrow as pa |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import MeCab | |
from collections import Counter | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
m = MeCab.Tagger("-Ochasen") | |
m2 = MeCab.Tagger("-Owakati") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PROFILE=your_profile | |
INSTANCE_NAME=your_instance_name | |
SSH_KEYPATH=your_ssh_key_path | |
PUBLIC_HOSTNAME=`aws --profile ${PROFILE} ec2 describe-instances | jq -r ".Reservations[] | select(.Instances[0].Tags[].Value == \"${INSTANCE_NAME}\") | .Instances[0] | .PublicDnsName"` | |
echo "establish SOCKS proxy" | |
echo "ssh -i ${SSH_KEYPATH} -D 8157 -q ec2-user@${PUBLIC_HOSTNAME}" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PROFILE=your_profile | |
INSTANCE_NAME=your_instance_name | |
aws --profile ${PROFILE} ec2 describe-instances | jq -r ".Reservations[] | select(.Instances[0].Tags[].Value == \"${INSTANCE_NAME}\") | .Instances[0] | {PrivateDnsName: .PrivateDnsName, PrivateIpAddress: .PrivateIpAddress, PublicDnsName: .PublicDnsName, PublicIpAddress: .PublicIpAddress}" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# dirs_compressor.py | |
# | |
# Usage: | |
# $ python dirs_compressor.py targed_dir | |
# | |
import sys | |
import os | |
import os.path | |
import logging |