Last active
January 15, 2023 09:36
-
-
Save TomBener/c3ebeb2c4da66d2a9a146c6925fe077b to your computer and use it in GitHub Desktop.
WeChat Moments Analysis with R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# WeChat Moments Analysis with R | |
# `install.packages("package-name")` # nolint | |
# Using Python in R Markdown | |
# library(reticulate) | |
# use_python("~/.pyenv/shims/python") | |
# Import packages | |
library(tufte) | |
library(dplyr) | |
library(tidyr) | |
library(ggplot2) | |
library(ggrepel) | |
library(showtext) | |
font_add("lxgw", "LXGWWenKai-Regular.ttf") | |
showtext_auto() | |
# Load the data | |
data <- read.csv("moments.csv") | |
sepdata <- separate_rows(data, medium, sep = ",\\s") | |
########## 朋友圈可见时长 | |
data0 <- distinct(data, name, time_of_visibility) | |
df <- | |
data0 %>% | |
count(time_of_visibility) %>% | |
mutate(perc = n / sum(n)) %>% | |
mutate(labels = scales::percent(perc)) | |
df %>% | |
ggplot(aes(x = "", y = n, | |
fill = factor(time_of_visibility, | |
levels = c("三天", "一个月", "半年", "无时限")))) + | |
geom_col() + | |
geom_label(aes(label = labels), | |
position = position_stack(vjust = 0.5), | |
size = 3, | |
family = "lxgw", | |
show.legend = FALSE) + | |
coord_polar(theta = "y") + | |
guides(fill = guide_legend(title = "朋友圈可见时长")) + | |
theme_void() + | |
theme(text = element_text(family = "lxgw")) | |
########## 朋友圈活跃人群 | |
df <- | |
data %>% | |
count(relationship_with_me) %>% | |
mutate(perc = n / sum(n)) %>% | |
mutate(labels = scales::percent(perc)) %>% | |
arrange(desc(relationship_with_me)) %>% | |
mutate(text_y = cumsum(n) - n / 2) | |
df %>% | |
ggplot(aes(x = "", y = n, fill = relationship_with_me)) + | |
geom_col() + | |
# reference: https://stackoverflow.com/a/69715619 | |
geom_label_repel(aes(label = labels, y = text_y), | |
force = 0.5, | |
nudge_x = 1, | |
nudge_y = 0.5, | |
size = 3, | |
show.legend = FALSE, | |
family = "lxgw") + | |
coord_polar(theta = "y") + | |
scale_fill_brewer(palette = "Pastel1") + | |
guides(fill = guide_legend(title = NULL)) + | |
theme_void() + | |
theme(text = element_text(family = "lxgw")) | |
########## 发朋友圈次数 | |
df <- data %>% | |
group_by(name) %>% | |
mutate(count_name_occurr = n()) | |
ggplot(data = df, | |
aes(y = reorder(name, count_name_occurr), | |
fill = name)) + | |
geom_bar() + | |
scale_x_continuous(breaks = seq(0, 18, 1)) + | |
theme_minimal() + | |
labs(x = "发朋友圈次数", y = "隐藏的姓名") + | |
theme(text = element_text(family = "lxgw"), | |
axis.text.y = element_blank(), | |
legend.position = "none") | |
########## 发布的媒介类型 | |
df <- | |
sepdata %>% | |
count(medium) %>% | |
mutate(perc = n / sum(n)) %>% | |
mutate(labels = scales::percent(perc)) %>% | |
arrange(desc(medium)) %>% | |
mutate(text_y = cumsum(n) - n / 2) | |
df %>% | |
ggplot(aes(x = "", y = n, fill = medium)) + | |
geom_col() + | |
geom_label_repel(aes(label = labels, y = text_y), | |
force = 0.5, | |
nudge_x = 0.8, | |
nudge_y = 0.5, | |
size = 3, | |
show.legend = FALSE, | |
family = "lxgw") + | |
coord_polar(theta = "y") + | |
scale_fill_brewer(palette = "Pastel2") + | |
guides(fill = guide_legend(title = NULL)) + | |
theme_void() + | |
theme(text = element_text(family = "lxgw")) | |
########## 每天的发布次数 | |
df <- data %>% | |
group_by(day) %>% | |
mutate(count_day_occurr = n()) | |
ggplot(data = df, | |
aes(y = reorder(day, count_day_occurr), | |
fill = day)) + | |
geom_bar() + | |
scale_x_continuous(breaks = seq(0, 26, 4)) + | |
theme_minimal() + | |
labs(x = "每天的发布次数") + | |
theme(text = element_text(family = "lxgw"), | |
axis.title.y = element_blank(), | |
legend.position = "none") | |
ggplot(data = df, aes(x = day, | |
y = count_day_occurr, | |
group = 1) | |
) + | |
geom_line(color = "blue") + | |
geom_point() + | |
scale_y_continuous(breaks = seq(0, 26, 4)) + | |
theme_minimal() + | |
labs(x = "每天的发布次数") + | |
theme(text = element_text(family = "lxgw"), | |
axis.title.y = element_blank(), | |
axis.text.x = element_text(angle = 90, hjust = 1), | |
legend.position = "none") | |
########## 一天中的时间分布 | |
df <- data %>% | |
group_by(hour) %>% | |
mutate(count_hour_occurr = n()) | |
ggplot(data = df, | |
aes(y = reorder(hour, count_hour_occurr), | |
fill = hour)) + | |
geom_bar() + | |
scale_x_continuous(breaks = seq(0, 26, 4)) + | |
theme_minimal() + | |
labs(x = "一天中的时间分布") + | |
theme(text = element_text(family = "lxgw"), | |
axis.title.y = element_blank(), | |
legend.position = "none") | |
ggplot(data = df, | |
aes(x = hour, y = count_hour_occurr, | |
group = 1)) + | |
geom_line(color = "blue") + | |
geom_point() + | |
scale_y_continuous(breaks = seq(0, 26, 4)) + | |
theme_minimal() + | |
labs(x = "一天中的时间分布") + | |
theme(text = element_text(family = "lxgw"), | |
axis.title.y = element_blank(), | |
axis.text.x = element_text(angle = 90, hjust = 1), | |
legend.position = "none") | |
########## 背景信息 | |
df <- | |
data %>% | |
count(context) %>% | |
mutate(perc = n / sum(n)) %>% | |
mutate(labels = scales::percent(perc)) | |
df %>% | |
ggplot(aes(x = "", y = n, fill = context)) + | |
geom_col() + | |
geom_label(aes(label = labels), | |
position = position_stack(vjust = 0.5), | |
size = 3, | |
family = "lxgw", | |
show.legend = FALSE) + | |
coord_polar(theta = "y") + | |
guides(fill = guide_legend(title = "背景信息")) + | |
theme_void() + | |
theme(text = element_text(family = "lxgw")) | |
########## 属性 | |
df <- | |
data %>% | |
count(property) %>% | |
mutate(perc = n / sum(n)) %>% | |
mutate(labels = scales::percent(perc)) | |
df %>% | |
ggplot(aes(x = "", y = n, fill = property)) + | |
geom_col() + | |
geom_label(aes(label = labels), | |
position = position_stack(vjust = 0.5), | |
size = 3, | |
family = "lxgw", | |
show.legend = FALSE) + | |
coord_polar(theta = "y") + | |
guides(fill = guide_legend(title = "属性")) + | |
theme_void() + | |
theme(text = element_text(family = "lxgw")) | |
########## 情绪 | |
df <- | |
data %>% | |
count(emotional_level) %>% | |
mutate(perc = n / sum(n)) %>% | |
mutate(labels = scales::percent(perc)) | |
df %>% | |
ggplot(aes(x = "", y = n, fill = emotional_level)) + | |
geom_col() + | |
geom_label(aes(label = labels), | |
position = position_stack(vjust = 0.5), | |
size = 2.5, | |
family = "lxgw", | |
show.legend = FALSE) + | |
coord_polar(theta = "y") + | |
guides(fill = guide_legend(title = "情绪")) + | |
theme_void() + | |
theme(text = element_text(family = "lxgw")) | |
########## 图片数量 | |
df <- data %>% | |
group_by(number_of_images) %>% | |
mutate(count_image_occurr = n()) | |
ggplot(data = df, | |
aes(x = reorder(number_of_images, | |
-count_image_occurr), | |
fill = factor(number_of_images))) + | |
geom_bar() + | |
scale_y_continuous(breaks = seq(0, 100, 10)) + | |
theme_minimal() + | |
labs(x = "图片数量") + | |
theme(text = element_text(family = "lxgw"), | |
axis.title.y = element_blank(), | |
legend.position = "none") | |
df <- | |
data %>% | |
count(number_of_images) %>% | |
mutate(perc = n / sum(n)) %>% | |
mutate(labels = scales::percent(perc)) | |
df %>% | |
ggplot(aes(x = "", y = n, | |
fill = factor(number_of_images))) + | |
geom_col() + | |
geom_label(aes(label = labels), | |
position = position_stack(vjust = 0.5), | |
size = 2.3, | |
family = "lxgw", | |
show.legend = FALSE) + | |
coord_polar(theta = "y") + | |
guides(fill = guide_legend(title = "图片数量")) + | |
theme_void() + | |
theme(text = element_text(family = "lxgw")) | |
########## 点赞数量 | |
df <- data %>% | |
group_by(number_of_likes) %>% | |
mutate(count_like_occurr = n()) | |
ggplot(data = df, | |
aes(x = reorder(number_of_likes, | |
-count_like_occurr), | |
fill = factor(number_of_likes))) + | |
geom_bar() + | |
scale_y_continuous(breaks = seq(0, 170, 10)) + | |
theme_minimal() + | |
labs(x = "点赞数量") + | |
theme(text = element_text(family = "lxgw"), | |
axis.title.y = element_blank(), | |
legend.position = "none") | |
df <- | |
data %>% | |
count(number_of_likes) %>% | |
mutate(perc = n / sum(n)) %>% | |
mutate(labels = scales::percent(perc)) %>% | |
arrange(desc(number_of_likes)) %>% | |
mutate(text_y = cumsum(n) - n / 2) | |
df %>% | |
ggplot(aes(x = "", y = n, | |
fill = factor(number_of_likes))) + | |
geom_col() + | |
geom_label_repel(aes(label = labels, y = text_y), | |
force = 0.5, | |
nudge_x = 1, | |
nudge_y = 0.5, | |
size = 3, | |
show.legend = FALSE, | |
family = "lxgw") + | |
coord_polar(theta = "y") + | |
guides(fill = guide_legend(title = "点赞数量")) + | |
theme_void() + | |
theme(text = element_text(family = "lxgw")) | |
########## 评论数量 | |
df <- data %>% | |
group_by(number_of_comments) %>% | |
mutate(count_comment_occurr = n()) | |
ggplot(data = df, | |
aes(x = reorder(number_of_comments, | |
-count_comment_occurr), | |
fill = factor(number_of_comments))) + | |
geom_bar() + | |
scale_y_continuous(breaks = seq(0, 220, 10)) + | |
theme_minimal() + | |
labs(x = "评论数量") + | |
theme(text = element_text(family = "lxgw"), | |
axis.title.y = element_blank(), | |
legend.position = "none") | |
df <- | |
data %>% | |
count(number_of_comments) %>% | |
mutate(perc = n / sum(n)) %>% | |
mutate(labels = scales::percent(perc)) %>% | |
arrange(desc(number_of_comments)) %>% | |
mutate(text_y = cumsum(n) - n / 2) | |
df %>% | |
ggplot(aes(x = "", y = n, | |
fill = factor(number_of_comments))) + | |
geom_col() + | |
geom_label_repel(aes(label = labels, y = text_y), | |
force = 0.5, | |
nudge_x = 1, | |
nudge_y = 0.5, | |
size = 3, | |
show.legend = FALSE, | |
family = "lxgw") + | |
coord_polar(theta = "y") + | |
guides(fill = guide_legend(title = "评论数量")) + | |
theme_void() + | |
theme(text = element_text(family = "lxgw")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Keep in mind you must create a file containing the formatted data named
moments.csv
before running the code.The
moments.csv
is like:Then you can execute the code by running the command below:
A 17-page PDF file named
Rplots.pdf
will be generated in the current directory.