原題:Dynamo: Amazon’s Highly Available Key-value Store
原文: Amazon's Dynamo - All Things Distributed (PDF Version)
This article is translated by @ono_matope. Please contact me if any problem.
| use strict; | |
| use warnings; | |
| my @offset = (20, 20, -20, -20); | |
| while(<*.pdf>) | |
| { | |
| next if /-cropped/; | |
原題:Dynamo: Amazon’s Highly Available Key-value Store
原文: Amazon's Dynamo - All Things Distributed (PDF Version)
This article is translated by @ono_matope. Please contact me if any problem.
| require 'td' | |
| require 'td-client' | |
| require 'time' | |
| require 'date' | |
| # auth | |
| auth_key = File.read("client.cfg", :encoding => Encoding::UTF_8) | |
| cln = TreasureData::Client.new(auth_key) | |
| # argv | |
| date = ARGV[0] |
| -- Presto | |
| SELECT | |
| a.td_client_id as td_client_id | |
| ,a.td_os as td_os | |
| ,'Weekly_Over5_Accesses' as segment_name | |
| FROM | |
| ( | |
| SELECT | |
| td_client_id |
| --hive | |
| SELECT | |
| TD_SESSIONIZE(time, 86400, td_ip) as session_id | |
| , time | |
| , td_ip | |
| , td_path | |
| , td_client_id | |
| , td_title | |
| , td_browser | |
| , td_color |
| SELECT | |
| TD_TIME_FORMAT(TD_TIME_PARSE(session_start_time),'yyyy-MM-dd') as date | |
| ,count(distinct session_id) as session_cnt | |
| FROM | |
| session_summary | |
| GROUP BY | |
| TD_TIME_FORMAT(TD_TIME_PARSE(session_start_time),'yyyy-MM-dd') | |
| ORDER BY | |
| TD_TIME_FORMAT(TD_TIME_PARSE(session_start_time),'yyyy-MM-dd') |
| ito@ito:~/embulk$ embulk preview config/s3_fluent_load.yml | |
| 2015-12-02 00:28:28.930 -0800: Embulk v0.7.1 | |
| 2015-12-02 00:28:30.186 -0800 [INFO] (preview): Loaded plugin embulk-input-s3 (0.2.3) | |
| 2015-12-02 00:28:30.229 -0800 [INFO] (preview): Loaded plugin embulk-parser-fluent-s3-log (0.0.1) | |
| java.lang.IllegalArgumentException: Multiple entries with same key: material_id=org.embulk.spi.util.dynamic.StringColumnSetter@5a82bc58 and material_id=org.embulk.spi.util.dynamic.StringColumnSetter@4aab7195 | |
| at com.google.common.collect.ImmutableMap.checkNoConflict(com/google/common/collect/com/google/common/collect/com/google/common/collect/ImmutableMap.java:150) | |
| at com.google.common.collect.RegularImmutableMap.checkNoConflictInBucket(com/google/common/collect/com/google/common/collect/com/google/common/collect/RegularImmutableMap.java:104) | |
| at com.google.common.collect.RegularImmutableMap.<init>(com/google/common/collect/com/google/common/collect/com/google/common/collect/RegularImmutableMap.java:70) |
| in: | |
| type: s3 | |
| bucket: td-test-data | |
| path_prefix: test/example.log | |
| access_key_id: TTTTTTTTTTTTTTTTTT | |
| secret_access_key: XXXXXXXXXXXXXXXXXXXXXXXXXXXX | |
| parser: | |
| type: fluent-s3-log | |
| columns: | |
| - {name: uid, type: string} |
| in: | |
| type: postgresql | |
| host: 00.00.00.00 | |
| user: tank_user | |
| password: "XXXXXXXXXXXXXXXX" | |
| database: datatank | |
| query: | | |
| SELECT os,device,flag,count | |
| FROM device_master | |
| out: |
| in: | |
| type: s3 | |
| access_key_id: XXXXXXXXXX | |
| secret_access_key: YYYYYYYYYY | |
| bucket: sample_bucket | |
| path_prefix: path/to/sample_file | |
| parser: | |
| charset: UTF-8 | |
| newline: CRLF | |
| type: csv |