Last active
August 13, 2019 00:51
-
-
Save benmarwick/dc8a8abd17207f907123e65160dbccf8 to your computer and use it in GitHub Desktop.
Exploring the PCA published by Prentiss (1998) to understand the usefulness of the Sullivan and Rozen debitage typology
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------------------------------------ | |
# Exploring the PCA published by Prentiss (1998) to understand the | |
# usefulness of the Sullivan and Rozen debitage typology | |
# read in & tidy the data ----------------------------------------------- | |
library(tidyverse) | |
# got these data from table 7 (p. 644) of https://www.jstor.org/stable/2694112 | |
# OCR'd using https://tabula.technology/ | |
prentiss <- readr::read_csv("tabula-Prentiss 1988.csv") | |
# the abbreviations are, from the paper: | |
# CF = Complete Flake; PF = Proximal Fragment; MDF = Medial/distal;Fragment; | |
# NF = Nonorientable Fragment;SF = Split Flake; HH = Hard Hammer; SH = Soft Hammer; | |
# PR = Pressure; PC = Prepared Core; UPC = Unprepared Core; BF = Biface; FL = Flake Tool. | |
# transpose to long format for analysis and plotting | |
prentiss_long <- | |
prentiss %>% | |
gather(variable, value, -var) %>% | |
spread(var, value) %>% | |
as.data.frame() | |
# split out the labels to use for plotting later, | |
# and convert them to factors because that's what the plotting functions need | |
prentiss_long_labels <- | |
prentiss_long %>% | |
separate(variable, | |
sep = "-", | |
into = c("technological_origin", "reduction_activity", "number"), | |
remove = FALSE) %>% | |
mutate_at(vars(technological_origin, | |
reduction_activity, | |
number, | |
variable), | |
as.factor) | |
# remove character cols for scatterplot panel | |
prentiss_long_rownames <- prentiss_long | |
row.names(prentiss_long_rownames) <- prentiss_long_rownames$variable | |
prentiss_long_rownames <- prentiss_long_rownames[,-1] | |
# scatterplot panel ----------------------------------------------- | |
# show scatterplot panel to get an overview of how the variables relate | |
# to each other | |
library(GGally) | |
ggpairs(prentiss_long_rownames) + | |
theme_bw() | |
# Compute & inspect the PCA ----------------------------------------------- | |
# put the sample names as row names so | |
# they show up later as labels on the plot | |
row.names(prentiss_long_labels) <- prentiss_long_labels$variable | |
library(FactoMineR) | |
# compute PCA | |
res.pca <- PCA(prentiss_long_labels, | |
quali.sup = 1:4, | |
graph = FALSE) | |
# inspect eigenvalues, values >1 indicate that component captures more | |
# variability that any of the original measurement variables | |
eigenvalues <- res.pca$eig | |
head(eigenvalues[, 1:2]) | |
# inspect distribution of PCs | |
library(factoextra) | |
fviz_screeplot(res.pca) | |
# Visualise output from the PCA ------------------------------------------ | |
# plot variable loadings | |
fviz_pca_var(res.pca, | |
col.var="contrib") + | |
scale_color_viridis_c() + | |
theme_bw() | |
# inspect PC1 vs PC2 | |
fviz_pca_ind(res.pca, | |
geom = "text") + | |
theme_bw() + | |
coord_equal() | |
# show ellipses for force application type | |
fviz_pca_biplot(res.pca, | |
geom = "text", | |
habillage = prentiss_long_labels$technological_origin, | |
addEllipses=TRUE) + | |
theme_bw() + | |
coord_equal() + | |
ggtitle("PCA showing artefacts grouped by technological origin") | |
# show ellipses for technology type | |
fviz_pca_biplot(res.pca, | |
geom = "text", | |
habillage = prentiss_long_labels$reduction_activity, | |
addEllipses=TRUE) + | |
theme_bw() + | |
coord_equal() + | |
ggtitle("PCA showing artefacts grouped by reduction activity") | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var | HH-PC-1 | SH-UPC-2 | HH-BF-3 | SH-BF-4 | HH-UPC-5 | SH-UPC-6 | HH-PC-7 | SH-PC-8 | HH-FL-9 | SH-FL-10 | PR-FL-11 | HH-FL-12 | SH-BF-13 | PR-BF-14 | HH-BF-15 | SH-UPC-16 | PR-UPC-17 | HH-UPC-18 | SH-PC-19 | PR-PC-20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CF | 0.04 | 0.11 | 0.11 | 0.03 | 0.09 | 0.03 | 0.11 | 0.05 | 0.44 | 0.27 | 0.12 | 0.27 | 0.09 | 0.07 | 0.08 | 0.07 | 0.11 | 0.18 | 0.15 | 0.14 | |
PF | 0.08 | 0.17 | 0.12 | 0.14 | 0.12 | 0.1 | 0.17 | 0.13 | 0.03 | 0.16 | 0.28 | 0.16 | 0.16 | 0.17 | 0.11 | 0.08 | 0.41 | 0.16 | 0.16 | 0.26 | |
MDF | 0.71 | 0.57 | 0.63 | 0.75 | 0.64 | 0.66 | 0.63 | 0.65 | 0.39 | 0.51 | 0.36 | 0.51 | 0.66 | 0.59 | 0.63 | 0.64 | 0.37 | 0.51 | 0.63 | 0.35 | |
NF | 0.1 | 0.09 | 0.07 | 0.03 | 0.07 | 0.06 | 0.06 | 0.09 | 0 | 0.01 | 0 | 0.01 | 0 | 0 | 0.1 | 0.14 | 0.01 | 0.11 | 0 | 0.01 | |
SF | 0.07 | 0.06 | 0.07 | 0.05 | 0.08 | 0.15 | 0.03 | 0.08 | 0.14 | 0.05 | 0.24 | 0.05 | 0.09 | 0.17 | 0.08 | 0.09 | 0.1 | 0.04 | 0.06 | 0.24 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment