Skip to content

Instantly share code, notes, and snippets.

@jackbuehner
Last active December 8, 2021 23:59
Show Gist options
  • Save jackbuehner/960c89637542b97c5ec9e668f830f962 to your computer and use it in GitHub Desktop.
Save jackbuehner/960c89637542b97c5ec9e668f830f962 to your computer and use it in GitHub Desktop.
Creates tidy, discrete data from the hessi solar flares data. Also creates five random samples (KEVIN)
install.packages('lubridate')
install.packages('RCT')
install.packages('RWeka')
solarFlares <- read.csv("hessi.solar.flare.up_to_2018.csv")
# install lubridate, which can be used to convert HH:MM:SS to seconds
library(lubridate)
##### Remove instances where not solar flare #####
# instances of NS (not solar event) or PS (potential solar event) are when
# position and radial data are not collected (which is bad for analysis)
solarFlares = subset(solarFlares, x.pos.asec != 0 & y.pos.asec != 0 & radial != 0)
##### Convert timestamps to seconds #####
#add a 'start_time_seconds' column by converting the timestamp
# in the 'start.time' and 'start.date' column to seconds
# (seconds since January 1, 1970)
solarFlares$start_seconds = with(solarFlares, (as.numeric(as.Date(start.date)) * 24 * 60 * 60) + period_to_seconds(hms(start.time)))
get_next_time_seconds = function(start_date, start_hms, next_hms) {
table_start_date_seconds = (as.numeric(as.Date(start_date)) * 24 * 60 * 60);
table_start_time_seconds = period_to_seconds(hms(start_hms));
table_next_time_seconds = period_to_seconds(hms(next_hms));
# if next_seconds is less than start_seconds, it is the next day
# so add an entire day of seconds to next_seconds
# (because next seconds is on the day after start_date)
for(i in 1:length(table_next_time_seconds)) {
next_time_seconds = table_next_time_seconds[[i]];
start_time_second = table_start_time_seconds[[i]];
if (next_time_seconds < start_time_second) {
table_next_time_seconds[[i]] = next_time_seconds + (24 * 60 * 60);
}
}
# return next_seconds + the start date in seconds
return(table_next_time_seconds + table_start_date_seconds)
}
# convert peak time to seconds
solarFlares$peak_seconds = get_next_time_seconds(
solarFlares$start.date,
solarFlares$start.time,
solarFlares$peak
)
# convert end time to seconds
solarFlares$end_seconds = get_next_time_seconds(
solarFlares$start.date,
solarFlares$start.time,
solarFlares$end
)
##### Remove undesired columns #####
library(dplyr)
solarFlares <- solarFlares %>% select(-one_of('start.date', 'start.time', 'peak', 'end'))
##### Rename existing columns #####
library(dplyr)
solarFlares <- rename(solarFlares, duration_seconds = duration.s)
solarFlares <- rename(solarFlares, total_photons = total.counts)
solarFlares <- rename(solarFlares, position_arc_seconds.x = x.pos.asec)
solarFlares <- rename(solarFlares, position_arc_seconds.y = y.pos.asec)
solarFlares <- rename(solarFlares, sun_region = active.region.ar)
solarFlares <- rename(solarFlares, peak_max_photons = peak.c.s)
solarFlares <- rename(solarFlares, highest_energy_discrete = energy.kev)
##### Determine time to peak #####
solarFlares$seconds_to_peak <- with(solarFlares, peak_seconds - start_seconds)
##### Discretize seconds #####
library(RCT)
solarFlares$seconds_to_peak_discrete <- ntile_label(solarFlares$seconds_to_peak, 10)
solarFlares$duration_discrete <- ntile_label(solarFlares$duration_seconds, 10)
solarFlares$peak_max_photons_discrete <- ntile_label(solarFlares$peak_max_photons, 10)
solarFlares$total_photons_discrete <- ntile_label(solarFlares$total_photons, 10)
##### Export changes to csv and arff #####
write.csv(solarFlares, 'solar_flares_before_2018-03-03.csv')
library(RWeka)
write.arff(solarFlares, 'solar_flares_before_2018-03-03.arff')
##### Create five random samples #####
set.seed(11)
k <- sample_n(solarFlares, 25000)
write.arff(k, 'sample_k.arff')
set.seed(5)
e <- sample_n(solarFlares, 25000)
write.arff(e, 'sample_e.arff')
set.seed(22)
v <- sample_n(solarFlares, 25000)
write.arff(v, 'sample_v.arff')
set.seed(9)
i <- sample_n(solarFlares, 25000)
write.arff(i, 'sample_i.arff')
set.seed(14)
n <- sample_n(solarFlares, 25000)
write.arff(n, 'sample_n.arff')
@jackbuehner
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment