Last active
May 23, 2017 21:50
-
-
Save infotroph/ebf43d986f06c1c266275f23fc1262fc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ``` | |
| library("PEcAn.data.atmosphere") | |
| library("dplyr") | |
| ne = download.Geostreams( | |
| outfolder="~/gstest", | |
| sitename="UIUC Energy Farm - NE", | |
| start_date="2016-03-01", | |
| end_date="2016-03-31") | |
| # 2017-05-23 15:07:59 INFO [download.Geostreams] : | |
| # https://terraref.ncsa.illinois.edu/clowder/api/geostreams/datapoints?sensor_id=411&since=2016-03-01&until=2016-03-31 | |
| se = download.Geostreams( | |
| outfolder="~/gstest", | |
| sitename="UIUC Energy Farm - SE", | |
| start_date="2016-03-01",end_date="2016-03-31") | |
| # 2017-05-23 15:09:00 INFO [download.Geostreams] : | |
| # https://terraref.ncsa.illinois.edu/clowder/api/geostreams/datapoints?sensor_id=410&since=2016-03-01&until=2016-03-31 | |
| cen = download.Geostreams( | |
| outfolder="~/gstest", | |
| sitename="UIUC Energy Farm - CEN", | |
| start_date="2016-03-01", | |
| end_date="2016-03-31") | |
| # 2017-05-23 15:09:16 INFO [download.Geostreams] : | |
| # https://terraref.ncsa.illinois.edu/clowder/api/geostreams/datapoints?sensor_id=412&since=2016-03-01&until=2016-03-31 | |
| file.info(list.files("~/gstest", full.names=T)) | |
| # size | |
| # /home/a-m/black11/gstest/Clowder.UIUC Energy Farm - CEN.2016-03-01.2016-03-31.2016.json 53357824 | |
| # /home/a-m/black11/gstest/Clowder.UIUC Energy Farm - NE.2016-03-01.2016-03-31.2016.json 1694881 | |
| # /home/a-m/black11/gstest/Clowder.UIUC Energy Farm - SE.2016-03-01.2016-03-31.2016.json 3377084 | |
| # ...snipped... | |
| ne_parsed= jsonlite::read_json(ne$file, simplifyVector=TRUE, flatten=TRUE) | |
| se_parsed= jsonlite::read_json(se$file, simplifyVector=TRUE, flatten=TRUE) | |
| cen_parsed= jsonlite::read_json(cen$file, simplifyVector=TRUE, flatten=TRUE) | |
| # Does CEN have more timepoints, or are some timepoints duplicated? | |
| dups <- function(data){ | |
| data %>% summarize( | |
| n=n(), | |
| n_id=n_distinct(id), | |
| n_start=n_distinct(start_time), | |
| n_end=n_distinct(end_time)) | |
| } | |
| dups(ne_parsed$data) | |
| # n n_id n_start n_end | |
| # 1 2876 2876 2876 2876 | |
| dups(se_parsed$data) | |
| # n n_id n_start n_end | |
| # 1 5752 5752 2876 2876 | |
| dups(cen_parsed$data) | |
| # n n_id n_start n_end | |
| # 1 94908 94908 2876 2876 | |
| # ==> SE has two of each timepoint, and CEN has 33(!) of each timepoint. | |
| # Is the whole timepoint duplicated, or only some parameters? | |
| varies = function(x) length(unique(x)) > 1 | |
| varies_within_time = function(data){ | |
| (data | |
| %>% group_by(start_time) | |
| %>% summarize_each(funs(varies)) | |
| %>% summarize_each(funs(any), -start_time)) | |
| } | |
| varies_within_time(ne_parsed$data) %>% unlist() %>% .[.] | |
| # named logical(0) | |
| varies_within_time(se_parsed$data) %>% unlist() %>% .[.] | |
| # id created | |
| # TRUE TRUE | |
| varies_within_time(cen_parsed$data) %>% unlist() %>% .[.] | |
| # id created | |
| # TRUE TRUE | |
| # ==> in SE and CEN, each row has its own ID and the time created varies, but the met data are duplicated within each timepoint. | |
| ``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment