Created
February 16, 2022 19:27
-
-
Save camsaul/1e5a56e1c58f30c972737af56077be18 to your computer and use it in GitHub Desktop.
Tree Shaking the Hive JDBC Driver
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns metabase.driver.sparksql-connection-test | |
(:require [clojure.java.jdbc :as jdbc] | |
[clojure.test :refer :all])) | |
(deftest connection-test | |
(println (pr-str (Class/forName "org.apache.hive.jdbc.HiveConnection"))) | |
(is (some? (Class/forName "org.apache.hive.jdbc.HiveConnection"))) | |
(println (pr-str (require 'metabase.driver.hive-like.fixed-hive-connection))) | |
(is (nil? (require 'metabase.driver.hive-like.fixed-hive-connection))) | |
(println "Check connection") | |
(with-open [^java.sql.Connection conn ((resolve 'metabase.driver.hive-like.fixed-hive-connection/fixed-hive-connection) | |
"jdbc:hive2://localhost:10000/" | |
(java.util.Properties.))] | |
(is (= [{:one 1}] | |
(jdbc/query {:connection conn} "SELECT 1 AS one;"))))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns test-driver | |
(:require [metabuild-common.core :as u])) | |
(def deps | |
'[aopalliance/aopalliance | |
ch.qos.logback/logback-classic | |
ch.qos.logback/logback-core | |
co.cask.tephra/tephra-api | |
co.cask.tephra/tephra-core | |
co.cask.tephra/tephra-hbase-compat-1.0 | |
com.carrotsearch/hppc | |
com.cedarsoftware/java-util | |
com.cedarsoftware/json-io | |
com.fasterxml.jackson.core/jackson-annotations | |
com.fasterxml.jackson.core/jackson-core | |
com.fasterxml.jackson.core/jackson-databind | |
com.fasterxml.jackson.jaxrs/jackson-jaxrs-base | |
com.fasterxml.jackson.jaxrs/jackson-jaxrs-json-provider | |
com.fasterxml.jackson.module/jackson-module-jaxb-annotations | |
com.fasterxml.woodstox/woodstox-core | |
com.github.joshelser/dropwizard-metrics-hadoop-metrics2-reporter | |
com.github.stephenc.findbugs/findbugs-annotations | |
com.github.stephenc.jcip/jcip-annotations | |
com.google.code.findbugs/jsr305 | |
com.google.code.gson/gson | |
com.google.guava/guava | |
com.google.inject/guice | |
com.google.inject.extensions/guice-assistedinject | |
com.google.inject.extensions/guice-servlet | |
com.google.protobuf/protobuf-java | |
com.google.re2j/re2j | |
com.jcraft/jsch | |
com.jolbox/bonecp | |
com.lmax/disruptor | |
com.microsoft.sqlserver/mssql-jdbc | |
com.nimbusds/nimbus-jose-jwt | |
com.sun.jersey/jersey-client | |
com.sun.jersey/jersey-core | |
com.sun.jersey/jersey-json | |
com.sun.jersey/jersey-server | |
com.sun.jersey/jersey-servlet | |
com.sun.jersey.contribs/jersey-guice | |
com.sun.xml.bind/jaxb-impl | |
com.tdunning/json | |
com.thoughtworks.paranamer/paranamer | |
com.vlkan/flatbuffers | |
com.zaxxer/HikariCP | |
com.zaxxer/HikariCP-java7 | |
commons-beanutils/commons-beanutils | |
commons-cli/commons-cli | |
commons-codec/commons-codec | |
commons-collections/commons-collections | |
commons-daemon/commons-daemon | |
commons-dbcp/commons-dbcp | |
commons-io/commons-io | |
commons-lang/commons-lang | |
commons-logging/commons-logging | |
commons-net/commons-net | |
commons-pool/commons-pool | |
de.ruedigermoeller/fst | |
dnsjava/dnsjava | |
io.airlift/aircompressor | |
io.dropwizard.metrics/metrics-core | |
io.dropwizard.metrics/metrics-json | |
io.dropwizard.metrics/metrics-jvm | |
io.netty/netty | |
io.netty/netty-all | |
io.netty/netty-buffer | |
io.netty/netty-common | |
it.unimi.dsi/fastutil | |
javax.annotation/javax.annotation-api | |
javax.inject/javax.inject | |
javax.jdo/jdo-api | |
javax.servlet/javax.servlet-api | |
javax.servlet.jsp/javax.servlet.jsp-api | |
javax.servlet.jsp/jsp-api | |
javax.transaction/jta | |
javax.transaction/transaction-api | |
javax.validation/validation-api | |
javax.ws.rs/javax.ws.rs-api | |
javax.ws.rs/jsr311-api | |
javax.xml.bind/jaxb-api | |
javolution/javolution | |
jline/jline | |
joda-time/joda-time | |
junit/junit | |
log4j/log4j | |
net.minidev/accessors-smart | |
net.minidev/json-smart | |
net.sf.jpam/jpam | |
net.sf.opencsv/opencsv | |
org.antlr/antlr-runtime | |
org.apache.ant/ant | |
org.apache.ant/ant-launcher | |
org.apache.arrow/arrow-format | |
org.apache.arrow/arrow-memory | |
org.apache.arrow/arrow-vector | |
org.apache.avro/avro | |
org.apache.commons/commons-collections4 | |
org.apache.commons/commons-compress | |
org.apache.commons/commons-configuration2 | |
org.apache.commons/commons-crypto | |
org.apache.commons/commons-lang3 | |
org.apache.commons/commons-math3 | |
org.apache.curator/apache-curator | |
org.apache.curator/curator-client | |
org.apache.curator/curator-framework | |
org.apache.curator/curator-recipes | |
org.apache.derby/derby | |
org.apache.geronimo.specs/geronimo-jcache_1.0_spec | |
org.apache.hadoop/hadoop-annotations | |
org.apache.hadoop/hadoop-auth | |
org.apache.hadoop/hadoop-client | |
org.apache.hadoop/hadoop-common | |
org.apache.hadoop/hadoop-distcp | |
org.apache.hadoop/hadoop-hdfs | |
org.apache.hadoop/hadoop-mapreduce-client-app | |
org.apache.hadoop/hadoop-mapreduce-client-common | |
org.apache.hadoop/hadoop-mapreduce-client-core | |
org.apache.hadoop/hadoop-mapreduce-client-jobclient | |
org.apache.hadoop/hadoop-mapreduce-client-shuffle | |
org.apache.hadoop/hadoop-yarn-api | |
org.apache.hadoop/hadoop-yarn-client | |
org.apache.hadoop/hadoop-yarn-common | |
org.apache.hadoop/hadoop-yarn-registry | |
org.apache.hadoop/hadoop-yarn-server-applicationhistoryservice | |
org.apache.hadoop/hadoop-yarn-server-common | |
org.apache.hadoop/hadoop-yarn-server-resourcemanager | |
org.apache.hadoop/hadoop-yarn-server-web-proxy | |
org.apache.hbase/hbase-client | |
org.apache.hbase/hbase-common | |
org.apache.hbase/hbase-common$tests | |
org.apache.hbase/hbase-hadoop-compat | |
org.apache.hbase/hbase-hadoop2-compat | |
org.apache.hbase/hbase-http | |
org.apache.hbase/hbase-mapreduce | |
org.apache.hbase/hbase-metrics | |
org.apache.hbase/hbase-metrics-api | |
org.apache.hbase/hbase-prefix-tree | |
org.apache.hbase/hbase-procedure | |
org.apache.hbase/hbase-protocol | |
org.apache.hbase/hbase-protocol-shaded | |
org.apache.hbase/hbase-replication | |
org.apache.hbase/hbase-server | |
org.apache.hbase.thirdparty/hbase-shaded-miscellaneous | |
org.apache.hbase.thirdparty/hbase-shaded-netty | |
org.apache.hbase.thirdparty/hbase-shaded-protobuf | |
org.apache.hive/hive-classification | |
org.apache.hive/hive-common | |
org.apache.hive/hive-jdbc | |
org.apache.hive/hive-llap-client | |
org.apache.hive/hive-llap-common | |
org.apache.hive/hive-llap-common$tests | |
org.apache.hive/hive-llap-server | |
org.apache.hive/hive-llap-tez | |
org.apache.hive/hive-metastore | |
org.apache.hive/hive-serde | |
org.apache.hive/hive-service | |
org.apache.hive/hive-service-rpc | |
org.apache.hive/hive-shims | |
org.apache.hive/hive-standalone-metastore | |
org.apache.hive/hive-storage-api | |
org.apache.hive/hive-upgrade-acid | |
org.apache.hive.shims/hive-shims-0.23 | |
org.apache.hive.shims/hive-shims-common | |
org.apache.hive.shims/hive-shims-scheduler | |
org.apache.htrace/htrace-core | |
org.apache.htrace/htrace-core4 | |
org.apache.httpcomponents/httpclient | |
org.apache.httpcomponents/httpcore | |
org.apache.kerby/kerb-admin | |
org.apache.kerby/kerb-client | |
org.apache.kerby/kerb-common | |
org.apache.kerby/kerb-core | |
org.apache.kerby/kerb-crypto | |
org.apache.kerby/kerb-identity | |
org.apache.kerby/kerb-server | |
org.apache.kerby/kerb-simplekdc | |
org.apache.kerby/kerb-util | |
org.apache.kerby/kerby-asn1 | |
org.apache.kerby/kerby-config | |
org.apache.kerby/kerby-pkix | |
org.apache.kerby/kerby-util | |
org.apache.kerby/kerby-xdr | |
org.apache.kerby/token-provider | |
org.apache.logging.log4j/log4j-1.2-api | |
org.apache.logging.log4j/log4j-api | |
org.apache.logging.log4j/log4j-core | |
org.apache.logging.log4j/log4j-slf4j-impl | |
org.apache.logging.log4j/log4j-web | |
org.apache.orc/orc-core | |
org.apache.orc/orc-shims | |
org.apache.parquet/parquet-hadoop-bundle | |
org.apache.thrift/libfb303 | |
org.apache.thrift/libthrift | |
org.apache.twill/twill-api | |
org.apache.twill/twill-common | |
org.apache.twill/twill-core | |
org.apache.twill/twill-discovery-api | |
org.apache.twill/twill-discovery-core | |
org.apache.twill/twill-zookeeper | |
org.apache.yetus/audience-annotations | |
org.apache.zookeeper/zookeeper | |
org.clojure/clojure | |
org.clojure/core.specs.alpha | |
org.clojure/spec.alpha | |
org.codehaus.jackson/jackson-core-asl | |
org.codehaus.jackson/jackson-jaxrs | |
org.codehaus.jackson/jackson-mapper-asl | |
org.codehaus.jackson/jackson-xc | |
org.codehaus.jettison/jettison | |
org.codehaus.woodstox/stax2-api | |
org.datanucleus/datanucleus-api-jdo | |
org.datanucleus/datanucleus-core | |
org.datanucleus/datanucleus-rdbms | |
org.datanucleus/javax.jdo | |
org.ehcache/ehcache | |
org.fusesource.leveldbjni/leveldbjni-all | |
org.glassfish/javax.el | |
org.glassfish.hk2/hk2-api | |
org.glassfish.hk2/hk2-locator | |
org.glassfish.hk2/hk2-utils | |
org.glassfish.hk2/osgi-resource-locator | |
org.glassfish.hk2.external/aopalliance-repackaged | |
org.glassfish.hk2.external/javax.inject | |
org.glassfish.jersey.bundles.repackaged/jersey-guava | |
org.glassfish.jersey.containers/jersey-container-servlet-core | |
org.glassfish.jersey.core/jersey-client | |
org.glassfish.jersey.core/jersey-common | |
org.glassfish.jersey.core/jersey-server | |
org.glassfish.jersey.media/jersey-media-jaxb | |
org.glassfish.web/javax.servlet.jsp | |
org.hamcrest/hamcrest-core | |
org.jamon/jamon-runtime | |
org.javassist/javassist | |
org.jruby.jcodings/jcodings | |
org.jruby.joni/joni | |
org.mortbay.jetty/jetty | |
org.mortbay.jetty/jetty-util | |
org.ow2.asm/asm | |
org.slf4j/slf4j-api | |
org.slf4j/slf4j-log4j12 | |
org.xerial.snappy/snappy-java | |
sqlline/sqlline | |
xerces/xercesImpl | |
xml-apis/xml-apis | |
xmlenc/xmlenc]) | |
(defonce results (atom nil)) | |
(defn- check-if-needed [dep] | |
(u/step (format "Check if %s is needed" dep) | |
(try (u/sh {:dir "/home/cam/metabase", :quiet? true} | |
"clojure" | |
"-Sforce" | |
"-Sdeps" | |
(binding [*print-namespace-maps* false] | |
(pr-str {:aliases | |
{:spark | |
{:extra-deps {'org.apache.hive/hive-jdbc | |
{:mvn/version "3.1.2" | |
:exclusions [dep]}} | |
:extra-paths ["modules/drivers/sparksql/src" | |
"modules/drivers/sparksql/test"] | |
:java-opts ["-Ddrivers=sparksql"] | |
:exec-args {:only 'metabase.driver.sparksql-connection-test/connection-test}}}})) | |
"-X:dev:spark:test") | |
(locking u/announce (u/announce "%s is not needed." dep)) | |
(swap! results assoc dep false) | |
(catch Throwable _ | |
(locking u/announce (u/error "%s is required." dep)) | |
(swap! results assoc dep true))))) | |
(set! *print-namespace-maps* false) | |
(set! *warn-on-reflection* true) | |
(def pool-size 8) | |
(defonce ^java.util.concurrent.ThreadPoolExecutor pool | |
(java.util.concurrent.Executors/newFixedThreadPool pool-size)) | |
(defn- check-which-are-needed [] | |
(let [futures (for [dep deps] | |
(.submit pool ^Runnable (fn [] | |
(check-if-needed dep))))] | |
(doseq [futur futures] | |
(deref futur))) | |
(clojure.pprint/pprint @results)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The Hive JDBC driver is legitimately evil out of the box. It includes Jetty, Netty, all of Hadoop, all of Hive, Zookeeper ,the SQL Server JDBC Driver, Apache Derby DB (similar to H2), JRuby, Log4j. HBase, ANT, Google GSON, Google Protobuf, Jackson, Logback, and literally 240 other things. I programmatically tree shook it. It needs maybe a dozen of the 250 deps it ships with.
Basic idea was:
I ran this 8 deps at a time (it was blowing up my computer if I tried to do any more than that) and it took maybe 20 minutes or so to run thru every dep. At that point I made a combined :exclusions list and ran the whole test suite to confirm it
metabase/metabase#20563