Last active
March 10, 2020 18:34
-
-
Save bilus/46fb04ea936f05e26525706d52a2b409 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?xml version="1.0" encoding="utf-8"?> | |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
| "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
| <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> | |
| <head> | |
| <!-- 2020-03-10 Tue 19:33 --> | |
| <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> | |
| <title>stitchingdata</title> | |
| <meta name="generator" content="Org mode" /> | |
| <meta name="author" content="Marcin Bilski" /> | |
| <style type="text/css"> | |
| <!--/*--><![CDATA[/*><!--*/ | |
| .title { text-align: center; | |
| margin-bottom: .2em; } | |
| .subtitle { text-align: center; | |
| font-size: medium; | |
| font-weight: bold; | |
| margin-top:0; } | |
| .todo { font-family: monospace; color: red; } | |
| .done { font-family: monospace; color: green; } | |
| .priority { font-family: monospace; color: orange; } | |
| .tag { background-color: #eee; font-family: monospace; | |
| padding: 2px; font-size: 80%; font-weight: normal; } | |
| .timestamp { color: #bebebe; } | |
| .timestamp-kwd { color: #5f9ea0; } | |
| .org-right { margin-left: auto; margin-right: 0px; text-align: right; } | |
| .org-left { margin-left: 0px; margin-right: auto; text-align: left; } | |
| .org-center { margin-left: auto; margin-right: auto; text-align: center; } | |
| .underline { text-decoration: underline; } | |
| #postamble p, #preamble p { font-size: 90%; margin: .2em; } | |
| p.verse { margin-left: 3%; } | |
| pre { | |
| border: 1px solid #ccc; | |
| box-shadow: 3px 3px 3px #eee; | |
| padding: 8pt; | |
| font-family: monospace; | |
| overflow: auto; | |
| margin: 1.2em; | |
| } | |
| pre.src { | |
| position: relative; | |
| overflow: visible; | |
| padding-top: 1.2em; | |
| } | |
| pre.src:before { | |
| display: none; | |
| position: absolute; | |
| background-color: white; | |
| top: -10px; | |
| right: 10px; | |
| padding: 3px; | |
| border: 1px solid black; | |
| } | |
| pre.src:hover:before { display: inline;} | |
| /* Languages per Org manual */ | |
| pre.src-asymptote:before { content: 'Asymptote'; } | |
| pre.src-awk:before { content: 'Awk'; } | |
| pre.src-C:before { content: 'C'; } | |
| /* pre.src-C++ doesn't work in CSS */ | |
| pre.src-clojure:before { content: 'Clojure'; } | |
| pre.src-css:before { content: 'CSS'; } | |
| pre.src-D:before { content: 'D'; } | |
| pre.src-ditaa:before { content: 'ditaa'; } | |
| pre.src-dot:before { content: 'Graphviz'; } | |
| pre.src-calc:before { content: 'Emacs Calc'; } | |
| pre.src-emacs-lisp:before { content: 'Emacs Lisp'; } | |
| pre.src-fortran:before { content: 'Fortran'; } | |
| pre.src-gnuplot:before { content: 'gnuplot'; } | |
| pre.src-haskell:before { content: 'Haskell'; } | |
| pre.src-hledger:before { content: 'hledger'; } | |
| pre.src-java:before { content: 'Java'; } | |
| pre.src-js:before { content: 'Javascript'; } | |
| pre.src-latex:before { content: 'LaTeX'; } | |
| pre.src-ledger:before { content: 'Ledger'; } | |
| pre.src-lisp:before { content: 'Lisp'; } | |
| pre.src-lilypond:before { content: 'Lilypond'; } | |
| pre.src-lua:before { content: 'Lua'; } | |
| pre.src-matlab:before { content: 'MATLAB'; } | |
| pre.src-mscgen:before { content: 'Mscgen'; } | |
| pre.src-ocaml:before { content: 'Objective Caml'; } | |
| pre.src-octave:before { content: 'Octave'; } | |
| pre.src-org:before { content: 'Org mode'; } | |
| pre.src-oz:before { content: 'OZ'; } | |
| pre.src-plantuml:before { content: 'Plantuml'; } | |
| pre.src-processing:before { content: 'Processing.js'; } | |
| pre.src-python:before { content: 'Python'; } | |
| pre.src-R:before { content: 'R'; } | |
| pre.src-ruby:before { content: 'Ruby'; } | |
| pre.src-sass:before { content: 'Sass'; } | |
| pre.src-scheme:before { content: 'Scheme'; } | |
| pre.src-screen:before { content: 'Gnu Screen'; } | |
| pre.src-sed:before { content: 'Sed'; } | |
| pre.src-sh:before { content: 'shell'; } | |
| pre.src-sql:before { content: 'SQL'; } | |
| pre.src-sqlite:before { content: 'SQLite'; } | |
| /* additional languages in org.el's org-babel-load-languages alist */ | |
| pre.src-forth:before { content: 'Forth'; } | |
| pre.src-io:before { content: 'IO'; } | |
| pre.src-J:before { content: 'J'; } | |
| pre.src-makefile:before { content: 'Makefile'; } | |
| pre.src-maxima:before { content: 'Maxima'; } | |
| pre.src-perl:before { content: 'Perl'; } | |
| pre.src-picolisp:before { content: 'Pico Lisp'; } | |
| pre.src-scala:before { content: 'Scala'; } | |
| pre.src-shell:before { content: 'Shell Script'; } | |
| pre.src-ebnf2ps:before { content: 'ebfn2ps'; } | |
| /* additional language identifiers per "defun org-babel-execute" | |
| in ob-*.el */ | |
| pre.src-cpp:before { content: 'C++'; } | |
| pre.src-abc:before { content: 'ABC'; } | |
| pre.src-coq:before { content: 'Coq'; } | |
| pre.src-groovy:before { content: 'Groovy'; } | |
| /* additional language identifiers from org-babel-shell-names in | |
| ob-shell.el: ob-shell is the only babel language using a lambda to put | |
| the execution function name together. */ | |
| pre.src-bash:before { content: 'bash'; } | |
| pre.src-csh:before { content: 'csh'; } | |
| pre.src-ash:before { content: 'ash'; } | |
| pre.src-dash:before { content: 'dash'; } | |
| pre.src-ksh:before { content: 'ksh'; } | |
| pre.src-mksh:before { content: 'mksh'; } | |
| pre.src-posh:before { content: 'posh'; } | |
| /* Additional Emacs modes also supported by the LaTeX listings package */ | |
| pre.src-ada:before { content: 'Ada'; } | |
| pre.src-asm:before { content: 'Assembler'; } | |
| pre.src-caml:before { content: 'Caml'; } | |
| pre.src-delphi:before { content: 'Delphi'; } | |
| pre.src-html:before { content: 'HTML'; } | |
| pre.src-idl:before { content: 'IDL'; } | |
| pre.src-mercury:before { content: 'Mercury'; } | |
| pre.src-metapost:before { content: 'MetaPost'; } | |
| pre.src-modula-2:before { content: 'Modula-2'; } | |
| pre.src-pascal:before { content: 'Pascal'; } | |
| pre.src-ps:before { content: 'PostScript'; } | |
| pre.src-prolog:before { content: 'Prolog'; } | |
| pre.src-simula:before { content: 'Simula'; } | |
| pre.src-tcl:before { content: 'tcl'; } | |
| pre.src-tex:before { content: 'TeX'; } | |
| pre.src-plain-tex:before { content: 'Plain TeX'; } | |
| pre.src-verilog:before { content: 'Verilog'; } | |
| pre.src-vhdl:before { content: 'VHDL'; } | |
| pre.src-xml:before { content: 'XML'; } | |
| pre.src-nxml:before { content: 'XML'; } | |
| /* add a generic configuration mode; LaTeX export needs an additional | |
| (add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */ | |
| pre.src-conf:before { content: 'Configuration File'; } | |
| table { border-collapse:collapse; } | |
| caption.t-above { caption-side: top; } | |
| caption.t-bottom { caption-side: bottom; } | |
| td, th { vertical-align:top; } | |
| th.org-right { text-align: center; } | |
| th.org-left { text-align: center; } | |
| th.org-center { text-align: center; } | |
| td.org-right { text-align: right; } | |
| td.org-left { text-align: left; } | |
| td.org-center { text-align: center; } | |
| dt { font-weight: bold; } | |
| .footpara { display: inline; } | |
| .footdef { margin-bottom: 1em; } | |
| .figure { padding: 1em; } | |
| .figure p { text-align: center; } | |
| .equation-container { | |
| display: table; | |
| text-align: center; | |
| width: 100%; | |
| } | |
| .equation { | |
| vertical-align: middle; | |
| } | |
| .equation-label { | |
| display: table-cell; | |
| text-align: right; | |
| vertical-align: middle; | |
| } | |
| .inlinetask { | |
| padding: 10px; | |
| border: 2px solid gray; | |
| margin: 10px; | |
| background: #ffffcc; | |
| } | |
| #org-div-home-and-up | |
| { text-align: right; font-size: 70%; white-space: nowrap; } | |
| textarea { overflow-x: auto; } | |
| .linenr { font-size: smaller } | |
| .code-highlighted { background-color: #ffff00; } | |
| .org-info-js_info-navigation { border-style: none; } | |
| #org-info-js_console-label | |
| { font-size: 10px; font-weight: bold; white-space: nowrap; } | |
| .org-info-js_search-highlight | |
| { background-color: #ffff00; color: #000000; font-weight: bold; } | |
| .org-svg { width: 90%; } | |
| /*]]>*/--> | |
| </style> | |
| <script type="text/javascript"> | |
| /* | |
| @licstart The following is the entire license notice for the | |
| JavaScript code in this tag. | |
| Copyright (C) 2012-2020 Free Software Foundation, Inc. | |
| The JavaScript code in this tag is free software: you can | |
| redistribute it and/or modify it under the terms of the GNU | |
| General Public License (GNU GPL) as published by the Free Software | |
| Foundation, either version 3 of the License, or (at your option) | |
| any later version. The code is distributed WITHOUT ANY WARRANTY; | |
| without even the implied warranty of MERCHANTABILITY or FITNESS | |
| FOR A PARTICULAR PURPOSE. See the GNU GPL for more details. | |
| As additional permission under GNU GPL version 3 section 7, you | |
| may distribute non-source (e.g., minimized or compacted) forms of | |
| that code without the copy of the GNU GPL normally required by | |
| section 4, provided you include this license notice and a URL | |
| through which recipients can access the Corresponding Source. | |
| @licend The above is the entire license notice | |
| for the JavaScript code in this tag. | |
| */ | |
| <!--/*--><![CDATA[/*><!--*/ | |
| function CodeHighlightOn(elem, id) | |
| { | |
| var target = document.getElementById(id); | |
| if(null != target) { | |
| elem.cacheClassElem = elem.className; | |
| elem.cacheClassTarget = target.className; | |
| target.className = "code-highlighted"; | |
| elem.className = "code-highlighted"; | |
| } | |
| } | |
| function CodeHighlightOff(elem, id) | |
| { | |
| var target = document.getElementById(id); | |
| if(elem.cacheClassElem) | |
| elem.className = elem.cacheClassElem; | |
| if(elem.cacheClassTarget) | |
| target.className = elem.cacheClassTarget; | |
| } | |
| /*]]>*///--> | |
| </script> | |
| </head> | |
| <body> | |
| <div id="content"> | |
| <h1 class="title">stitchingdata</h1> | |
| <p> | |
| How many distinct broadcast IDs have been saved to gold database. | |
| </p> | |
| <div class="org-src-container"> | |
| <pre class="src src-sql"><span style="color: #51afef;">SELECT</span> <span style="color: #c678dd;">count</span>(<span style="color: #51afef;">distinct</span> JSON_EXTRACT_SCALAR(message, "$.<span style="color: #51afef;">data</span>.broadcast_id")) <span style="color: #51afef;">FROM</span> `conrad-production-api.conradEvents.___delivery__broadcasts__broadcasts__update_committed` | |
| </pre> | |
| </div> | |
| <pre class="example"> | |
| 117364 | |
| </pre> | |
| <p> | |
| How many distinct broadcast IDs are in events flowing into broadcast materializer? | |
| </p> | |
| <div class="org-src-container"> | |
| <pre class="src src-sql"><span style="color: #51afef;">SELECT</span> <span style="color: #c678dd;">count</span>(<span style="color: #51afef;">distinct</span> message.<span style="color: #51afef;">data</span>.broadcast_id) <span style="color: #51afef;">FROM</span> `conrad-production-api.conradEvents.StaticDataEnrichedUpdates` | |
| </pre> | |
| </div> | |
| <pre class="example"> | |
| 2378 | |
| </pre> | |
| <p> | |
| Since when do we have the latter events? | |
| </p> | |
| <div class="org-src-container"> | |
| <pre class="src src-sql"><span style="color: #51afef;">SELECT</span> <span style="color: #c678dd;">MIN</span>(<span style="color: #51afef;">distinct</span> <span style="color: #ECBE7B;">timestamp</span>) <span style="color: #51afef;">FROM</span> `conrad-production-api.conradEvents.StaticDataEnrichedUpdates` | |
| </pre> | |
| </div> | |
| <pre class="example"> | |
| 2019-11-27 16:11:40.597233 UTC | |
| </pre> | |
| <p> | |
| And how many total? | |
| </p> | |
| <div class="org-src-container"> | |
| <pre class="src src-sql"><span style="color: #51afef;">SELECT</span> <span style="color: #c678dd;">count</span>(*) <span style="color: #51afef;">FROM</span> `conrad-production-api.conradEvents.StaticDataEnrichedUpdates` | |
| </pre> | |
| </div> | |
| <pre class="example"> | |
| 218540 | |
| </pre> | |
| <p> | |
| How many updates to gold db happened since that time? | |
| </p> | |
| <div class="org-src-container"> | |
| <pre class="src src-sql"><span style="color: #51afef;">SELECT</span> <span style="color: #c678dd;">count</span>(*) <span style="color: #51afef;">FROM</span> `conrad-production-api.conradEvents.___delivery__broadcasts__broadcasts__update_committed` | |
| <span style="color: #51afef;">WHERE</span> <span style="color: #ECBE7B;">timestamp</span> >= (<span style="color: #51afef;">SELECT</span> <span style="color: #c678dd;">min</span>(<span style="color: #ECBE7B;">timestamp</span>) <span style="color: #51afef;">from</span> `conrad-production-api.conradEvents.StaticDataEnrichedUpdates`) | |
| </pre> | |
| </div> | |
| <pre class="example"> | |
| 2581296 | |
| </pre> | |
| <p> | |
| Can we link them using correlation<sub>id</sub>? | |
| </p> | |
| <div class="org-src-container"> | |
| <pre class="src src-sql"><span style="color: #51afef;">SELECT</span> c.message, u.message.metadata.correlation_id <span style="color: #51afef;">FROM</span> `conrad-production-api.conradEvents.___delivery__broadcasts__broadcasts__update_committed` <span style="color: #51afef;">AS</span> c | |
| <span style="color: #51afef;">LEFT</span> <span style="color: #51afef;">JOIN</span> `conrad-production-api.conradEvents.StaticDataEnrichedUpdates` <span style="color: #51afef;">AS</span> u <span style="color: #51afef;">ON</span> JSON_EXTRACT_SCALAR(c.message, "$.metadata.correlation_id") = u.message.metadata.correlation_id | |
| <span style="color: #51afef;">WHERE</span> c.<span style="color: #ECBE7B;">timestamp</span> >= (<span style="color: #51afef;">SELECT</span> <span style="color: #c678dd;">min</span>(<span style="color: #ECBE7B;">timestamp</span>) <span style="color: #51afef;">from</span> `conrad-production-api.conradEvents.StaticDataEnrichedUpdates`) <span style="color: #51afef;">AND</span> u.message.metadata.correlation_id <span style="color: #51afef;">IS</span> <span style="color: #51afef;">NOT</span> <span style="color: #51afef;">NULL</span> <span style="color: #51afef;">LIMIT</span> <span style="color: #da8548; font-weight: bold;">1000</span> | |
| </pre> | |
| </div> | |
| <p> | |
| No, no results. | |
| </p> | |
| <p> | |
| Can we link them using update<sub>id</sub>? | |
| </p> | |
| <div class="org-src-container"> | |
| <pre class="src src-sql"><span style="color: #51afef;">SELECT</span> <span style="color: #c678dd;">count</span>(*) <span style="color: #51afef;">FROM</span> `conrad-production-api.conradEvents.___delivery__broadcasts__broadcasts__update_committed` <span style="color: #51afef;">AS</span> c | |
| <span style="color: #51afef;">LEFT</span> <span style="color: #51afef;">JOIN</span> `conrad-production-api.conradEvents.StaticDataEnrichedUpdates` <span style="color: #51afef;">AS</span> u <span style="color: #51afef;">ON</span> JSON_EXTRACT_SCALAR(c.message, "$.<span style="color: #51afef;">data</span>.update_id") = u.message.<span style="color: #51afef;">data</span>.update_id | |
| <span style="color: #51afef;">WHERE</span> c.<span style="color: #ECBE7B;">timestamp</span> >= (<span style="color: #51afef;">SELECT</span> <span style="color: #c678dd;">min</span>(<span style="color: #ECBE7B;">timestamp</span>) <span style="color: #51afef;">from</span> `conrad-production-api.conradEvents.StaticDataEnrichedUpdates`) <span style="color: #51afef;">AND</span> u.message.<span style="color: #51afef;">data</span>.update_id <span style="color: #51afef;">IS</span> <span style="color: #51afef;">NOT</span> <span style="color: #51afef;">NULL</span> | |
| </pre> | |
| </div> | |
| <p> | |
| Yes, 1739664 rows. | |
| </p> | |
| <p> | |
| It seems that the very small number of bids in enriched updates causes a huge | |
| amount of change to gold records. | |
| </p> | |
| <div class="org-src-container"> | |
| <pre class="src src-sql"><span style="color: #51afef;">SELECT</span> u.message.<span style="color: #51afef;">data</span>.broadcast_id, u.message.<span style="color: #51afef;">data</span>.<span style="color: #51afef;">full</span>.freq, u.message.<span style="color: #51afef;">data</span>.<span style="color: #51afef;">full</span>.content_id, JSON_EXTRACT_SCALAR(c.message, "$.<span style="color: #51afef;">data</span>.broadcast_id") <span style="color: #51afef;">as</span> cbid, JSON_EXTRACT_SCALAR(c.message, "$.<span style="color: #51afef;">data</span>.baseline_data.freq") <span style="color: #51afef;">as</span> cfreq, JSON_EXTRACT_SCALAR(c.message, "$.<span style="color: #51afef;">data</span>.baseline_data.broadcastData.contentId") <span style="color: #51afef;">as</span> ccid, u.<span style="color: #ECBE7B;">timestamp</span> t1, c.<span style="color: #ECBE7B;">timestamp</span> t2 <span style="color: #51afef;">FROM</span> `conrad-production-api.conradEvents.___delivery__broadcasts__broadcasts__update_committed` <span style="color: #51afef;">AS</span> c | |
| <span style="color: #51afef;">LEFT</span> <span style="color: #51afef;">JOIN</span> `conrad-production-api.conradEvents.StaticDataEnrichedUpdates` <span style="color: #51afef;">AS</span> u <span style="color: #51afef;">ON</span> JSON_EXTRACT_SCALAR(c.message, "$.<span style="color: #51afef;">data</span>.update_id") = u.message.<span style="color: #51afef;">data</span>.update_id | |
| <span style="color: #51afef;">WHERE</span> c.<span style="color: #ECBE7B;">timestamp</span> >= (<span style="color: #51afef;">SELECT</span> <span style="color: #c678dd;">min</span>(<span style="color: #ECBE7B;">timestamp</span>) <span style="color: #51afef;">from</span> `conrad-production-api.conradEvents.StaticDataEnrichedUpdates`) <span style="color: #51afef;">AND</span> u.message.<span style="color: #51afef;">data</span>.update_id <span style="color: #51afef;">IS</span> <span style="color: #51afef;">NOT</span> <span style="color: #51afef;">NULL</span> <span style="color: #51afef;">AND</span> u.message.<span style="color: #51afef;">data</span>.broadcast_id = <span style="color: #da8548; font-weight: bold;">194527</span> <span style="color: #51afef;">LIMIT</span> <span style="color: #da8548; font-weight: bold;">1000</span> | |
| </pre> | |
| </div> | |
| </div> | |
| <div id="postamble" class="status"> | |
| <p class="author">Author: Marcin Bilski</p> | |
| <p class="date">Created: 2020-03-10 Tue 19:33</p> | |
| </div> | |
| </body> | |
| </html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment