Skip to content

Instantly share code, notes, and snippets.

@IvanIvanoff
Created August 10, 2021 13:15
Show Gist options
  • Save IvanIvanoff/ed6a1f64ff0e5c224f121eb80b9911a5 to your computer and use it in GitHub Desktop.
Save IvanIvanoff/ed6a1f64ff0e5c224f121eb80b9911a5 to your computer and use it in GitHub Desktop.
Mix.install([:nimble_csv])
defmodule Transform do
@slugs_top_100 ~w(bitcoin ethereum tether binance-coin cardano ripple dogecoin usd-coin polkadot-new uniswap binance-usd bitcoin-cash solana litecoin chainlink internet-computer wrapped-bitcoin ethereum-classic matic-network stellar theta vechain file-coin luna multi-collateral-dai tron aave monero ftx-token eos pancakeswap crypto-com-coin bitcoin-bep2 the-graph maker axie-infinity neo cosmos klaytn shiba-inu bitcoin-sv avalanche tezos algorand unus-sed-leo elrond-egld iota amp compound bittorrent huobi-token hedera-hashgraph terrausd decred kusama chiliz waves quant dash blockstack zcash nem thorchain theta-fuel holo celsius okb helium decentraland enjin-coin sushi yearn-finance trueusd ravencoin near-protocol xinfin-network synthetix-network-token flow zilliqa nexo basic-attention-token qtum bitcoin-gold telcoin harmony bancor paxos-standard ethos kucoin-shares celo digibyte siacoin ontology 0x mdex zencash ankr fantom swissborg curve)
@metrics ~w(active_addresses_24h transaction_volume circulation_1d exchange_inflow exchange_outflow exchange_balance active_deposits_5m age_consumed)
def timed_run() do
t1 = System.monotonic_time(:millisecond)
run()
t2 = System.monotonic_time(:millisecond)
IO.puts(
"Full run for #{length(@slugs_top_100) * length(@metrics)} files took: #{(t2 - t1) / 1000} seconds"
)
end
# Read the CSV on every iteration to simulate the real-world where there would be
# different files to read from.
def get_csv() do
csv =
Path.join([File.cwd!(), "data_files", "2015_5.csv"])
|> File.read!()
|> NimbleCSV.RFC4180.parse_string(skip_headers: true)
|> Enum.map(fn [dt, value, _] -> [dt, value] end)
end
def run do
output_file = File.open!("combined_metrics_file.csv", [:append, {:delayed_write, 1000, 20}])
for slug <- @slugs_top_100, metric <- @metrics do
t1 = System.monotonic_time(:millisecond)
# Put a random value in the CSV to simulate differnt data for different pairs
# This will also make the data less compressable so it will represent the worst
# case scenario
get_csv()
|> Stream.map(fn [dt, _value] -> [dt, slug, metric, :rand.uniform() * 10_000_000] end)
|> Stream.map(fn line -> Enum.join(line, ",") end)
|> Enum.each(fn line -> IO.binwrite(output_file, [line, "\n"]) end)
t2 = System.monotonic_time(:millisecond)
IO.puts("Handle file for #{slug} #{metric} took: #{t2 - t1}ms")
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment