Last active
January 13, 2018 13:12
-
-
Save JoeUnsung/8cf6a5b2c4dd93c15d619863f555db63 to your computer and use it in GitHub Desktop.
5 main verbs in dplyr
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## | |
## dplyr practice of 5 core verbs ( select, mutate, filter, arrange, summarise ) | |
## | |
## 1 select - 그냥 SQL select 처럼 쓰면 되나보다 범위 설정도 되는듯 1:4 이런식 | |
buying_no_shopping %>% | |
select(ID, BIZ_UNIT) | |
custo %>% | |
select(ID, GENDER, HOM_PST_NO) | |
custo %>% ## 컬럼 번호 범위로 가져오는 방법 | |
select(1:(ncol(custo)-1)) | |
buying_shopping %>% | |
select(ID, RCT_NO, PD_S_C) | |
## 2 mutate - 새로운 파생변수 만들 때 쓰는거 (변수 = 기존변수 * 조작) | |
glimpse(custo) | |
custo %>% | |
mutate(GENDER_KR = ifelse(GENDER == 1, "남자", "여자") ) %>% | |
select(1, 3:5,GENDER) | |
## 3 filter - where 같은거 &으로 연결지어서 사용 ,로도 된다. | |
custo %>% | |
filter(GENDER == 1, | |
HOM_PST_NO > 100 | |
) %>% | |
mutate(OVER =HOM_PST_NO/10) %>% | |
summarise(n = n()) | |
## 4 summarise - group_by 써서 aggregation function 쓸 때 summarise 활용하는 느낌이구나 | |
custo %>% | |
mutate(OVER =HOM_PST_NO/10) %>% | |
group_by(AGE_PRD) %>% | |
summarise( avg_cnt = sum(GENDER) / n() ) %>% | |
mutate(rnk=rank(avg_cnt)) %>% | |
arrange(desc(rnk)) | |
## Aggregation function 쓸 때는 이거 쓰는거고 | |
## as 같이 column 이름 쓸 때 = 로 넣어준다. | |
## count avg sum 쓸 때 이거 쓰면 되겠다. | |
## 5 arrange - desc() 쓰면 디센딩 오더 | |
custo %>% | |
mutate(OVER =HOM_PST_NO/10) %>% | |
group_by(AGE_PRD) %>% | |
summarise( avg_cnt = sum(GENDER) / n() ) %>% | |
mutate(rnk=rank(avg_cnt)) %>% | |
arrange(desc(rnk)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment