Created
October 31, 2017 11:44
-
-
Save DavZim/411c1fe6c61a395bf179af9150fd42aa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
``` r | |
library(ggplot2) # data vis | |
library(dplyr) # data manipulation | |
#> | |
#> Attaching package: 'dplyr' | |
#> The following objects are masked from 'package:stats': | |
#> | |
#> filter, lag | |
#> The following objects are masked from 'package:base': | |
#> | |
#> intersect, setdiff, setequal, union | |
library(readr) # data io | |
# set a theme for the plot | |
theme_set(theme_light()) | |
df <- read_csv("~/Downloads/dmwe.csv") | |
#> Parsed with column specification: | |
#> cols( | |
#> countrycode = col_character(), | |
#> SE_HS = col_character(), | |
#> gdppercap = col_character(), | |
#> general = col_character() | |
#> ) | |
# some basic cleaning | |
df <- df %>% | |
mutate( | |
SE_HS = as.numeric(SE_HS), | |
gdppercap = as.numeric(gdppercap), | |
general = as.numeric(general), | |
# compute the cut before the ggplot! easier debugging | |
cut_gdp = cut(gdppercap, c(-Inf, 15000, Inf)), | |
# a second, faster/easier approach: compute T/F if the value is smaller than 15k | |
cut_gdp2 = gdppercap <= 15000 | |
) | |
# first plot | |
ggplot(df, aes(x = general, y = SE_HS, color = cut_gdp)) + | |
geom_point(size = 3) + | |
scale_color_manual(name = "gdppercap", | |
# notice the 1.5e+04 instead of 15000! you adressed the wrong factor-labels | |
values = c("(-Inf,1.5e+04]" = "black", | |
"(1.5e+04, Inf]" = "red"), | |
labels = c("<= 15000", "> 15000")) | |
#> Warning: Removed 14105 rows containing missing values (geom_point). | |
``` | |
![](https://i.imgur.com/ltvxGEK.png) | |
``` r | |
# a second plot without the missing values | |
df %>% | |
filter(!is.na(cut_gdp2)) %>% | |
ggplot(aes(x = general, y = SE_HS, color = cut_gdp2)) + | |
geom_point(size = 3) + | |
scale_color_manual(name = "gdppercap", | |
values = c("TRUE" = "black", | |
"FALSE" = "red"), | |
labels = c("TRUE" = "<= 15000", "FALSE" = "> 15000")) | |
#> Warning: Removed 8725 rows containing missing values (geom_point). | |
``` | |
![](https://i.imgur.com/QaGCSgS.png) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment