This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.contrib.exporter import BaseItemExporter | |
from scrapy import signals, log | |
from pipeline_base import StorePipeline | |
from os.path import join | |
class CustomItemExporter(BaseItemExporter): | |
def __init__(self, file, **kwargs): | |
self._configure(kwargs, dont_fail=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# command example: | |
# $ spark-submit spark_log_extract.py \ | |
# --name test \ | |
# --notblankkeys dn,stm,ev_ac,pg_url \ | |
# --filterregex ".*(=ac_pl\`|=ac_dl\`).*" \ | |
# --usegzip \ | |
# /path/to/source \ | |
# /path/to/atom \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.{SparkConf, SparkContext} | |
object LogExtract { | |
val keys = Array[String]("dn", "stm", "ev_ac", "v_title", "v_uri", "pg_url") | |
val notBlankKeys = Array[String]("dn", "stm", "ev_ac", "pg_url") | |
val filterRegex = ".*(ac_pl`|ac_dl`).*" | |
val useCompress = false | |
def process(line: String): String = { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
from TCLIService import TCLIService | |
from TCLIService.ttypes import TOpenSessionReq, TGetTablesReq, TFetchResultsReq, \ | |
TStatusCode, TGetResultSetMetadataReq, TGetColumnsReq, TType, TTypeId, \ | |
TExecuteStatementReq, TGetOperationStatusReq, TFetchOrientation, TCloseOperationReq, \ | |
TCloseSessionReq, TGetSchemasReq, TCancelOperationReq, THandleIdentifier, \ | |
TOperationHandle, TOperationState | |
from thrift import Thrift |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# __author__ = 'zhangguoze' | |
import sys | |
import jieba | |
from libshorttext.libshorttext.analyzer import * | |
from libshorttext.libshorttext.classifier import * | |
from libshorttext.libshorttext.converter import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import sys | |
import jieba | |
import numpy | |
from sklearn import metrics | |
from sklearn.feature_extraction.text import HashingVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |