Created
September 5, 2024 06:37
-
-
Save YoRyan/cd6b803a54d35e57e6013ae43f08b9b4 to your computer and use it in GitHub Desktop.
Read data from the FTA's National Transit Database into Deedle with F#.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "nuget: Deedle" | |
#r "nuget: ExcelDataReader" | |
#r "nuget: ExcelDataReader.DataSet" | |
open Deedle | |
open ExcelDataReader | |
type Reporter = | |
{ NtdId: string; | |
Mode: string; | |
TypeOfService: string } | |
let ntdDataSet = | |
use stream = File.Open("June 2024 Complete Monthly Ridership (with adjustments and estimates)_240801.xlsx", FileMode.Open, FileAccess.Read) | |
use reader = ExcelReaderFactory.CreateReader(stream) | |
reader.AsDataSet() | |
let ntdMasterTable = | |
seq { for row in ntdDataSet.Tables["Master"].Rows -> row } | |
|> Seq.skip 1 | |
|> Seq.map (fun row -> | |
let readS (idx: int) = | |
match row.Item idx with | |
| :? System.String as s -> s | |
| _ -> "" | |
let readN (idx: int) = | |
match row.Item idx with | |
| :? System.Double as d -> d | |
| _ -> 0. | |
let key = { NtdId = readS 0; Mode = readS 3; TypeOfService = readS 4 } | |
let value = | |
{| Agency = readS 2 | |
UzaName = readS 12 | |
UzaSqMiles = readN 13 | |
UzaPopulation = readN 14 | |
ServiceAreaPopulation = readN 15 | |
ServiceAreaSqMiles = readN 16 | |
PassengerMilesFy = readN 20 | |
UnlinkedPassengerTripsFy = readN 21 | |
AverageTripLengthFy = readN 22 | |
FaresFy = readN 23 | |
OperatingExpensesFy = readN 24 | |
AverageCostPerTripFy = readN 25 | |
AverageFaresPerTripFy = readN 26 |} | |
(key, value)) | |
|> Map.ofSeq | |
let readNtdMonthlyTable (sheet: string) = | |
let ntdDatesSeq = | |
(fun index -> DateTime(2002 + (index / 12), 1 + (index % 12), 1)) | |
|> Seq.initInfinite | |
|> Seq.cache | |
let readNtdSeries (row: Data.DataRow): Series<DateTime, float> = | |
Seq.zip ntdDatesSeq (Seq.cast<float> row.ItemArray[10..]) | |
|> Seq.map Collections.Generic.KeyValuePair | |
|> Series | |
seq { for row in ntdDataSet.Tables[sheet].Rows -> row } | |
|> Seq.skip 1 | |
|> Seq.choose (fun row -> | |
let id = string row[0] | |
if id = "" then None | |
else Some ({ NtdId = id; Mode = string row[7]; TypeOfService = string row[8] }, readNtdSeries row)) | |
|> Frame.ofRows | |
let ntdUnlinkedPassengerTrips = readNtdMonthlyTable "UPT" | |
let ntdVehicleRevenueMiles = readNtdMonthlyTable "VRM" | |
let ntdVehicleRevenueHours = readNtdMonthlyTable "VRH" | |
let ntdVehiclesOperatingMaxService = readNtdMonthlyTable "VOMS" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment