library(db2pq)
library(dplyr, warn.conflicts = FALSE)
library(farr, warn.conflicts = FALSE)
wrds_update_pq("stksecurityinfohist", "crsp", force = TRUE)
#> Forcing update based on user request.
#> Beginning file download at 2026-03-21 11:15:00 UTC.
#> Completed file download at 2026-03-21 11:15:10 UTC.
wrds_update_pq("dsf_v2", "crsp")
#> crsp.dsf_v2 already up to date.
db <- DBI::dbConnect(duckdb::duckdb())
dsf_db <- farr::load_parquet(db, "dsf_v2", "crsp")
stksecurityinfohist_db <- farr::load_parquet(db, "stksecurityinfohist", "crsp")
crsp_daily_sub <-
dsf_db |>
# SELECTION DATE
# filter(between(dlycaldt, start_date, end_date)) |>
inner_join(
stksecurityinfohist_db |>
# no special share types (sharetype = 'NS')
filter(sharetype == "NS",
# security type equity (securitytype = 'EQTY')
securitytype == "EQTY",
# security sub type common stock (securitysubtype = 'COM'),
securitysubtype == "COM",
# only US-listed stocks
usincflg == "Y",
# issuers that are a corporation
# (issuertype %in% c("ACOR", "CORP"))
issuertype %in% c("ACOR", "CORP"),
# we use only stock prices from NYSE, Amex, and
# NASDAQ (primaryexch %in% c("N", "A", "Q"))
primaryexch %in% c("N", "A", "Q"),
# when or after issuance (conditionaltype %in% c("RW", "NW"))
conditionaltype %in% c("RW", "NW"),
# for actively traded stocks (tradingstatusflg == "A")
tradingstatusflg == "A") |>
select(permno, secinfostartdt, secinfoenddt),
join_by(permno)
) |>
# (iv) we keep only months within permno-specific start dates
# (secinfostartdt) and end dates (secinfoenddt)
filter(between(dlycaldt, secinfostartdt, secinfoenddt)) |>
mutate(
mktcap = (1000 * shrout / dlycumfacshr) * dlyprc / 10^6,
mktcap = na_if(mktcap, 0),
dlyprc = dlyprc/ dlycumfacpr
)
crsp_daily_sub |> count()
#> # Source: SQL [?? x 1]
#> # Database: DuckDB 1.4.4 [root@Darwin 25.4.0:R 4.5.2/:memory:]
#> n
#> <dbl>
#> 1 81665021Created on 2026-03-21 with reprex v2.1.1
Created on 2026-03-21 with reprex v2.1.1