- By Edmond Lau
- Highly Recommended 👍
- http://www.theeffectiveengineer.com/
- They are the people who get things done. Effective Engineers produce results.
| #!/usr/bin/python | |
| import json | |
| import argparse | |
| def convertType(type): | |
| if type=="long": | |
| return "bigint" | |
| else: | |
| return type |
| #!/bin/bash | |
| set -e | |
| CONTENTS=$(tesseract -c language_model_penalty_non_dict_word=0.8 --tessdata-dir /usr/local/share/ "$1" stdout -l eng | xml esc) | |
| hex=$((cat <<EOF | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | |
| <plist version="1.0"> |
| from imutils.object_detection import non_max_suppression | |
| import numpy as np | |
| import imutils | |
| import cv2 | |
| import requests | |
| import time | |
| import argparse | |
| import time | |
| ''' |
| from imutils.object_detection import non_max_suppression | |
| import numpy as np | |
| import imutils | |
| import cv2 | |
| import requests | |
| import time | |
| import argparse | |
| import time | |
| ''' |
| MiNiFi Config Version: 3 | |
| Flow Controller: | |
| name: GetFile | |
| comment: jsjlejgjkelkjgkalsjdgasetg | |
| Core Properties: | |
| flow controller graceful shutdown period: 10 sec | |
| flow service write delay interval: 500 ms | |
| administrative yield duration: 30 sec | |
| bored yield duration: 10 millis | |
| max concurrent threads: 1 |
| # Import packages | |
| from pyspark.ml import Pipeline | |
| from pyspark.ml.classification import RandomForestClassifier | |
| from pyspark.ml.feature import StringIndexer, VectorIndexer, OneHotEncoder, VectorAssembler, IndexToString | |
| from pyspark.ml.evaluation import MulticlassClassificationEvaluator | |
| from pyspark.sql.functions import * | |
| # Creating Spark SQL environment | |
| from pyspark.sql import SparkSession, HiveContext | |
| SparkContext.setSystemProperty("hive.metastore.uris", "thrift://nn1:9083") |
| <?xml version="1.0" encoding="UTF-8" standalone="yes"?> | |
| <template encoding-version="1.2"> | |
| <description></description> | |
| <groupId>8927f4c0-0160-1000-597a-ea764ccd81a7</groupId> | |
| <name>MDD</name> | |
| <snippet> | |
| <connections> | |
| <id>a2098494-cce9-3fa4-0000-000000000000</id> | |
| <parentGroupId>a8352767-434f-3321-0000-000000000000</parentGroupId> | |
| <backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> |
This example flow can be used to process files with following requirements:
A group of files can only be processed when every files for a specific group is ready
Each filename has groupId (e.g. 123_456) and a type name (e.g. ab/cd/ef)
Example set of files for group '123_456'
file_123_456.ex2
| hive> set hive.support.concurrency; | |
| hive.support.concurrency=true | |
| hive> set hive.enforce.bucketing; | |
| hive.enforce.bucketing=true | |
| hive> set hive.exec.dynamic.partition.mode; | |
| hive.exec.dynamic.partition.mode=nonstrict | |
| hive> set hive.txn.manager; | |
| hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager | |
| hive> set hive.compactor.initiator.on; | |
| hive.compactor.initiator.on=true |