Skip to content

Instantly share code, notes, and snippets.

@basilesimon
Created February 14, 2023 11:51
Show Gist options
  • Save basilesimon/6658d8fbd7ace4a54490704e757886b8 to your computer and use it in GitHub Desktop.
Save basilesimon/6658d8fbd7ace4a54490704e757886b8 to your computer and use it in GitHub Desktop.
DFRLab data wrangle
library(tidyverse)
library(readxl)
library(jsonlite)
read_excel("DFRLab_RNR-data.xlsx",
sheet = "RNR pre-invasion events") %>%
mutate(
path = target_url,
organization_id = "dfrlab",
collection_id = "pre-invasion-russian-narratives",
`asset_id:key` = "archive_no",
`asset_id:value` = id,
name = title,
description = "",
`author:type` = "Organization",
`author:name` = "Digital Forensic Research Lab",
`author:identifier` = "https://atlanticcouncil.org"
) %>%
mutate(
dateTime_month = str_pad(dateTime_month, 2, pad = "0"),
dateTime_day = str_pad(dateTime_day, 2, pad = "0"),
dateTime_hour = str_pad(dateTime_hour, 2, pad = "0"),
dateTime_minute = str_pad(dateTime_minute, 2, pad = "0"),
dateTime_second = str_pad(dateTime_second, 2, pad = "0")
) %>%
rowwise() %>%
mutate(
`extras:key_1` = "publication_datetime",
`extras:value_1` = str_interp(
"${dateTime_year}-${dateTime_month}-${dateTime_day}T${dateTime_hour}:${dateTime_minute}:${dateTime_second}Z"
)
) %>%
mutate(`extras:key_2` = "engagement",
`extras:value_2` = toJSON(
tibble(
facebook_total,
twitter_total,
telegram_engagement,
overall_engagement = overall_engagemet
)
)) %>%
ungroup() %>%
mutate(`extras:key_3` = "Event",
`extras:value_3` = Event) %>%
mutate(`extras:key_4` = "Authors",
`extras:value_4` = authors) %>%
rowwise() %>%
mutate(
general_narrative = coalesce(
`General narrative \"Ukraine is aggressive\"`,
`General narrative \"The West is creating tensions in the region\"`,
`General narrative \"Ukraine is a puppet of the West\"`,
`General narrative \"Russia has a moral obligation to protect the region's security\"`,
`General narrative \"Russia is seeking peace\"`,
`General narrative \"Other\"`
)
) %>%
mutate(
narrative_source = coalesce(
`Source of the narrative \"Ukraine is aggressive\"`,
`Source of the narrative \"The West is creating tensions in the region\"`,
`Source of the narrative \"Ukraine is a puppet of the West\"`,
`Source of the narrative \"Russia has a moral obligation to protect the region's security\"`,
`Source of the narrative \"Russia is seeking peace\"`,
`Source of the narrative \"Other\"`
)
) %>%
mutate(
narrative = coalesce(
`Ukraine is aggressive`,
`The West is creating tensions in the region`,
`Ukraine is a puppet of the West`,
`Russia has a moral obligation to protect the region's security`,
`Russia is seeking peace`,
`Other`
)
) %>%
mutate(`extras:key_5` = "narrative",
`extras:value_5` = toJSON(tibble(
narrative, narrative_source, general_narrative
))) %>%
select(
path,
organization_id,
collection_id,
`asset_id:key`,
`asset_id:value`,
name,
description,
`author:type`,
`author:name`,
`author:identifier`,
`extras:key_1`,
`extras:value_1`,
`extras:key_2`,
`extras:value_2`,
`extras:key_3`,
`extras:value_3`,
`extras:key_4`,
`extras:value_4`,
`extras:key_5`,
`extras:value_5`
) %>%
write_csv("dfrlab_export.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment