Skip to content

Instantly share code, notes, and snippets.

@clarksun
clarksun / UploadDirS3.py
Created October 25, 2018 06:35 — forked from feelinc/UploadDirS3.py
Upload folder contents to AWS S3
#!/usr/bin/python
import os
import sys
import boto3
# get an access token, local (from) directory, and S3 (to) directory
# from the command-line
local_directory, bucket, destination = sys.argv[1:4]
@clarksun
clarksun / periodic.py
Created July 17, 2018 02:27 — forked from akaIDIOT/periodic.py
Call something periodically using asyncio
import asyncio
def call_periodic(interval, callback, *args, **kwargs):
# get loop as a kwarg or take the default one
loop = kwargs.get('loop') or asyncio.get_event_loop()
# record the loop's time when call_periodic was called
start = loop.time()
def run(handle):

Keybase proof

I hereby claim:

  • I am clarksun on github.
  • I am sunwei (https://keybase.io/sunwei) on keybase.
  • I have a public key whose fingerprint is F950 1F84 6804 11EC 6E3E 96EC 4EAA 26A7 9973 6081

To claim this, I am signing this object:

@clarksun
clarksun / sentiment.py
Last active March 6, 2018 08:20 — forked from bbengfort/sentiment.py
An end-to-end demonstration of a Scikit-Learn SVM classifier trained on the positive and negative movie reviews corpus in NLTK.
import os
import time
import string
import pickle
from operator import itemgetter
from nltk.corpus import stopwords as sw
from nltk.corpus import wordnet as wn
from nltk import wordpunct_tokenize
@clarksun
clarksun / db.py
Last active January 25, 2018 03:26
frontera DBW
# -*- coding: utf-8 -*-
"""
*batch*
/root/.virtualenvs/amazon/bin/python -m amazon.frontier.worker.db --config amazon.frontier.settings.prod.hbase.workersettings --no-incoming --no-scoring
*incoming*
/root/.virtualenvs/amazon/bin/python -m amazon.frontier.worker.db --config amazon.frontier.settings.prod.hbase.workersettings --no-batches --no-scoring --partition-id 0
/root/.virtualenvs/amazon/bin/python -m amazon.frontier.worker.db --config amazon.frontier.settings.prod.hbase.workersettings --no-batches --no-scoring --partition-id 1
/root/.virtualenvs/amazon/bin/python -m amazon.frontier.worker.db --config amazon.frontier.settings.prod.hbase.workersettings --no-batches --no-scoring --partition-id 2
/root/.virtualenvs/amazon/bin/python -m amazon.frontier.worker.db --config amazon.frontier.settings.prod.hbase.workersettings --no-batches --no-scoring --partition-id 3
@clarksun
clarksun / object_dict.py
Last active January 22, 2018 05:02
python dict用.来获取属性, 不用[]或者get
# tornado.util.ObjectDict
# Makes a dictionary behave like an object, with attribute-style access.
import typing
_ObjectDictBase = typing.Dict[str, typing.Any]
class ObjectDict(_ObjectDictBase):
"""Makes a dictionary behave like an object, with attribute-style access.
"""
def __getattr__(self, name):
# type: (str) -> Any
@clarksun
clarksun / http_tools.py
Last active January 17, 2018 05:26
临时代理获取临时cookie
# https://github.com/GuozhuHe/webspider/blob/master/webspider/utils/http_tools.py
def get_proxys(pages=4):
"""获取代理"""
proxy_list = []
url = 'http://www.xicidaili.com/wn/'
headers = generate_http_header()
headers.update(
{
'Referer': 'http://www.xicidaili.com/wn/',
'Host': 'www.xicidaili.com',
@clarksun
clarksun / cache.py
Created January 17, 2018 02:32
redis缓存函数结果
# coding=utf-8
# https://github.com/GuozhuHe/webspider/blob/master/webspider/utils/cache.py
import logging
import pickle
from functools import wraps
import redis
from common import config
@clarksun
clarksun / elapsed_time.py
Created January 11, 2018 06:16
计算运行时间
import time
class ET:
def __init__(self):
self.start_time = time.time()
def __call__(self):
return time.time() - self.start_time
@clarksun
clarksun / pastebin.lua
Last active January 11, 2018 00:47 — forked from tdlm/pastebin.lua
PasteBin shortcut for Hammerspoon
-- View your api_dev_key here: http://pastebin.com/api
local PASTEBIN_API_DEVELOPER_KEY = ""
-- Generate your api_user_key here: http://pastebin.com/api/api_user_key.html
local PASTEBIN_API_USER_KEY = ""
-- This makes a paste public or private, public = 0, unlisted = 1, private = 2
local PASTEBIN_API_PASTE_PRIVATE = "1"
--[[