This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Note: This file is managed by Puppet. | |
| # Hue configuration file | |
| # =================================== | |
| # | |
| # For complete documentation about the contents of this file, run | |
| # $ <hue_root>/build/env/bin/hue config_help | |
| # | |
| # All .ini files under the current directory are treated equally. Their | |
| # contents are merged to form the Hue configuration, which can |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "rev_timestamp": "2017-06-06T20:18:06Z", | |
| "rev_sha1": "fy9n1sdjob23lhe51hspnnvqmicrgm8", | |
| "rev_parent_id": 137977799, | |
| "rev_minor_edit": false, | |
| "rev_len": 14267, | |
| "rev_id": 137981314, | |
| "rev_content_model": "wikitext", | |
| "rev_content_format": "wikitext", | |
| "comment": "", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| set session sql_log_bin=0; | |
| -- Dropping EventLogging tables with no events more recent than 90 days ago (since )... | |
| DROP TABLE `BannerImpression_5329872`; | |
| DROP TABLE `CentralNoticeBannerHistory_13447710`; | |
| DROP TABLE `ChangesListHighlights_16449602`; | |
| DROP TABLE `ChangesListHighlights_16484288`; | |
| DROP TABLE `CommandInvocation_15237653`; | |
| DROP TABLE `CompletionSuggestions_13424343`; | |
| DROP TABLE `CompletionSuggestions_13630018`; | |
| DROP TABLE `ContentTranslationCTA_11616099`; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // // TODO: instead of the following, what if we took the original outputDf, and | |
| // // added null cast as type fields not present in outputDf | |
| // // to the outputDfs schema. Then we shouldn't need to re-read the data with the | |
| // val table = hiveContext.table(tableName) | |
| // | |
| // // THis is kinda working? Maybe we need need | |
| //// val emptyTableDf = table.where("1=0") | |
| //// val finalDf = emptyTableDf.unionAll(outputDf) | |
| // | |
| // // make sure the output df has table's fields |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| object CoolStuff { | |
| implicit class StringImplicits(s: String) { | |
| def withCool(s2: String): String = s + " COOL " + s2 | |
| } | |
| implicit class SeqStringImplicits(strings: Seq[String]) { | |
| def joinEm(sep: String = " | "): String = { | |
| // error: not found: value withCool | |
| strings.map(withCool).mkString(sep) | |
| // But this will work: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| whitelist='(DiacriticsPoll|DiacriticsVisibility|Echo|EchoInteraction|Echo|EchoMail|Echo|Edit|EditorActivation|FlowReplies|GatherClicks|GatherFlags|GettingStartedNavbarNoArticle|GettingStartedOnRedirect|GettingStartedRedirectImpression|GuidedTourButtonClick|GuidedTour|GuidedTourExited|GuidedTourExternalLinkActivation|GuidedTourGuiderHidden|GuidedTourGuiderImpression|GuidedTour|GuidedTourInternalLinkActivation|GuidedTour|MediaWikiInstallPingback|MobileOptionsTracking|MobileWebBrowse|MobileWebClickTracking|MobileWebCta|MobileWebDiffClickTracking|MobileWebEditing|MobileWebInfobox|MobileWebLanguageSwitcher|MobileWebMainMenuClickTracking|MobileWebSearch|MobileWebSectionUsage|MobileWebUIClickTracking|MobileWebUploads|MobileWebWatching|MobileWebWatchlistClickTracking|MobileWebWikiGrok|MobileWebWikiGrokError|MobileWebWikiGrok|MobileWebWikiGrokResponse|MobileWebWikiGrok|MobileWikiAppAppearanceSettings|MobileWikiAppArticleSuggestions|MobileWikiAppCreateAccount|MobileWikiAppDailyStats|MobileWikiAppEdit|MobileWikiAppInsta |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sseclient import SSEClient | |
| import json | |
| url = 'https://stream.wikimedia.org/v2/stream/recentchange' | |
| for event in SSEClient(url): | |
| if event.event == 'message' and event.data: | |
| change = json.loads(event.data) | |
| print('%(user)s edited %(title)s' % change) | |
| elif event.event == 'error': | |
| print('--- Encountered error', event.data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Where to output varnish log lines: | |
| # kafka - (default) send to kafka broker | |
| # stdout - just print to stdout (behave like varnishncsa) | |
| # null - (test) collect all tags specified by format but dont output anything | |
| output = stdout | |
| # Log formatter | |
| format.type = json | |
| format = %{fake_tag0@hostname?fake-hostname-change-me-if-you-want}x %{@sequence!num?0}n %{end:%FT%T@dt}t %{Varnish:time_firstbyte@time_firstbyte!num?0.0}x %{X-Client-IP@ip}o %{X-Cache-Status@cache_status}o %{@http_status}s %{@response_size!num?0}b %{@http_method}m %{Host@uri_host}i %{@uri_path}U %{@uri_query}q %{Content-Type@content_type}o %{Referer@referer}i %{X-Forwarded-For@x_forwarded_for}i %{User-Agent@user_agent}i %{Accept-Language@accept_language}i %{X-Analytics@x_analytics}o %{Range@range}i %{X-Cache@x_cache}o |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package org.wikimedia.analytics.refinery.job | |
| import com.github.nscala_time.time.Imports.{LocalDate, Period} | |
| import com.twitter.algebird.{QTree, QTreeSemigroup} | |
| import org.apache.hadoop.fs.Path | |
| import org.apache.spark.rdd.RDD | |
| import org.apache.spark.sql.hive.HiveContext | |
| import org.apache.spark.sql.{DataFrame, SQLContext} | |
| import org.apache.spark.{SparkConf, SparkContext} | |
| import scopt.OptionParser |