Last active
December 8, 2021 23:59
-
-
Save jackbuehner/960c89637542b97c5ec9e668f830f962 to your computer and use it in GitHub Desktop.
Creates tidy, discrete data from the hessi solar flares data. Also creates five random samples (KEVIN)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
install.packages('lubridate') | |
install.packages('RCT') | |
install.packages('RWeka') | |
solarFlares <- read.csv("hessi.solar.flare.up_to_2018.csv") | |
# install lubridate, which can be used to convert HH:MM:SS to seconds | |
library(lubridate) | |
##### Remove instances where not solar flare ##### | |
# instances of NS (not solar event) or PS (potential solar event) are when | |
# position and radial data are not collected (which is bad for analysis) | |
solarFlares = subset(solarFlares, x.pos.asec != 0 & y.pos.asec != 0 & radial != 0) | |
##### Convert timestamps to seconds ##### | |
#add a 'start_time_seconds' column by converting the timestamp | |
# in the 'start.time' and 'start.date' column to seconds | |
# (seconds since January 1, 1970) | |
solarFlares$start_seconds = with(solarFlares, (as.numeric(as.Date(start.date)) * 24 * 60 * 60) + period_to_seconds(hms(start.time))) | |
get_next_time_seconds = function(start_date, start_hms, next_hms) { | |
table_start_date_seconds = (as.numeric(as.Date(start_date)) * 24 * 60 * 60); | |
table_start_time_seconds = period_to_seconds(hms(start_hms)); | |
table_next_time_seconds = period_to_seconds(hms(next_hms)); | |
# if next_seconds is less than start_seconds, it is the next day | |
# so add an entire day of seconds to next_seconds | |
# (because next seconds is on the day after start_date) | |
for(i in 1:length(table_next_time_seconds)) { | |
next_time_seconds = table_next_time_seconds[[i]]; | |
start_time_second = table_start_time_seconds[[i]]; | |
if (next_time_seconds < start_time_second) { | |
table_next_time_seconds[[i]] = next_time_seconds + (24 * 60 * 60); | |
} | |
} | |
# return next_seconds + the start date in seconds | |
return(table_next_time_seconds + table_start_date_seconds) | |
} | |
# convert peak time to seconds | |
solarFlares$peak_seconds = get_next_time_seconds( | |
solarFlares$start.date, | |
solarFlares$start.time, | |
solarFlares$peak | |
) | |
# convert end time to seconds | |
solarFlares$end_seconds = get_next_time_seconds( | |
solarFlares$start.date, | |
solarFlares$start.time, | |
solarFlares$end | |
) | |
##### Remove undesired columns ##### | |
library(dplyr) | |
solarFlares <- solarFlares %>% select(-one_of('start.date', 'start.time', 'peak', 'end')) | |
##### Rename existing columns ##### | |
library(dplyr) | |
solarFlares <- rename(solarFlares, duration_seconds = duration.s) | |
solarFlares <- rename(solarFlares, total_photons = total.counts) | |
solarFlares <- rename(solarFlares, position_arc_seconds.x = x.pos.asec) | |
solarFlares <- rename(solarFlares, position_arc_seconds.y = y.pos.asec) | |
solarFlares <- rename(solarFlares, sun_region = active.region.ar) | |
solarFlares <- rename(solarFlares, peak_max_photons = peak.c.s) | |
solarFlares <- rename(solarFlares, highest_energy_discrete = energy.kev) | |
##### Determine time to peak ##### | |
solarFlares$seconds_to_peak <- with(solarFlares, peak_seconds - start_seconds) | |
##### Discretize seconds ##### | |
library(RCT) | |
solarFlares$seconds_to_peak_discrete <- ntile_label(solarFlares$seconds_to_peak, 10) | |
solarFlares$duration_discrete <- ntile_label(solarFlares$duration_seconds, 10) | |
solarFlares$peak_max_photons_discrete <- ntile_label(solarFlares$peak_max_photons, 10) | |
solarFlares$total_photons_discrete <- ntile_label(solarFlares$total_photons, 10) | |
##### Export changes to csv and arff ##### | |
write.csv(solarFlares, 'solar_flares_before_2018-03-03.csv') | |
library(RWeka) | |
write.arff(solarFlares, 'solar_flares_before_2018-03-03.arff') | |
##### Create five random samples ##### | |
set.seed(11) | |
k <- sample_n(solarFlares, 25000) | |
write.arff(k, 'sample_k.arff') | |
set.seed(5) | |
e <- sample_n(solarFlares, 25000) | |
write.arff(e, 'sample_e.arff') | |
set.seed(22) | |
v <- sample_n(solarFlares, 25000) | |
write.arff(v, 'sample_v.arff') | |
set.seed(9) | |
i <- sample_n(solarFlares, 25000) | |
write.arff(i, 'sample_i.arff') | |
set.seed(14) | |
n <- sample_n(solarFlares, 25000) | |
write.arff(n, 'sample_n.arff') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@medenton