benmarwick · August 4, 2018 17:01 · benmarwick · May 19, 2018 · benmarwick · Jul 21, 2018
diff --git a/cluster-images-by-hand-drawn-outlines b/cluster-images-by-hand-drawn-outlines
 # to make traces...
 # Import an EPS file in Inkscape
 # Trace over it to show half of vessel profile, from top of rim to centre of base, about 20-30 segements
 # detelete EPS from from Inkscape
 # export PNG of trace from Inkscape



 library(magick)
 lf <- list.files("../traces", full.names = TRUE)

 for (i in seq_along(jpgs)) {
  image_write(image_convert(image_read(lf[[i]]), "jpg"), paste0(lf[[i]], "_output.jpg"))
 }

 jpgs<- list.files("../traces", pattern = ".jpg", full.names = TRUE)
 library(Momocs)
 jpgs_imported <- import_jpg(jpgs, threshold = 0.1)

 jpgs_imported_coo <- 
 jpgs_imported[-2] %>% 
  Opn() %>% 
  Out() %>% 
  coo_close() %>% 
  coo_center %>% 
  coo_scale %>% 
  coo_align() %>% 
  coo_rotate(theta =  pi/2) %>% 
  coo_slidedirection("up") 

 jpgs_imported_coo %>% 
   panel()

 #  efa
 op <- efourier(jpgs_imported_coo, norm= FALSE)
 op.p <- PCA(op)
 plot(op.p, 
     cex = 3, 
     labelspoints = TRUE,
     cex.labelspoints = 1)
 PCcontrib(op.p)




diff --git a/pca-pottery-outlines.R b/pca-pottery-outlines.R

 # get SVG files from google drive
 library(googledrive)
 ID <- "https://drive.google.com/drive/u/1/folders/1wNLi_v40KX-U6s42eHrwueiearLvZ5Jv"
 g_drive <- drive_ls( path = "Pot_outline_trace", recursive = TRUE, pattern = ".svg$")

 # download SVGs from google
 library(tidyverse)
 map2(g_drive$id, 
     g_drive$name,
     ~as_id(.x) %>% drive_download(path = str_glue('Pottery_outlines/outlines_svg/{.y}')))

 lf <- list.files("Pottery_outlines/outlines_svg", 
                 full.names = TRUE)


 library(xml2)
 remove_scan_and_fill <- function(svg_file) {
  # remove the scan image from the SVG
  t1 <- read_xml(svg_file)
  t2 <- xml_children(t1)
  scan_layer <- t2[[4]]
  xml_remove(scan_layer)
  # now we have a SVG without the scan layer
  
  # change from outline to fill
  stroke_and_fill_node <- xml_children(t1)[4]
  stroke_and_fill_node_inner <- xml_children(stroke_and_fill_node)
  xml_attr(stroke_and_fill_node_inner, "style") <- 
    "fill:#000000;stroke:none;stroke-width:5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"

  write_xml(t1,
            str_glue("Pottery_outlines/outlines_svg/fill_{basename(svg_file)}"))
  
  # fit image object to canvas in svg so it's all captured during the conversion to JPG
  system(str_glue('inkscape --verb=EditSelectAll --verb=AlignHorizontalCenter --verb=AlignVerticalCenter  --verb=FitCanvasToSelectionOrDrawing --verb=FileSave --verb=FileQuit "$(pwd)/Pottery_outlines/outlines_svg/fill_{basename(svg_file)}"'))
            
 }

 # run the function on all SVGs
 walk(lf, remove_scan_and_fill)

 # convert SVG to PNG
 filled_svgs <- list.files("Pottery_outlines/outlines_svg", pattern = "^fill_", full.names = TRUE)

 # convert svgs to to jpg
 library(magick)
 map(filled_svgs,
  ~image_read_svg(.x) %>% 
  image_background("white") %>%  
  image_convert("jpg") %>% 
  image_write(str_glue("{.x}_output.jpg")))

 # from https://gist.github.com/benmarwick/3d160da203e4abaacd670d895f1ca72a

 #--------------------------------------------------
 # Once we have the jpgs, start from here
 library(tidyverse)

 # get list of jpgs only
 jpgs <- list.files("Pottery_outlines/outlines_svg", pattern = ".jpg", full.names = TRUE)

 # join file names to phase
 pottery_data <- readr::read_csv("Pottery_outlines/KWL_Ceramic_final2018_with_phases.csv")
 pottery_data <- pottery_data %>% 
  mutate(filename = str_glue('fill_{filename}.svg_output.jpg'))

 # some outlines that we have no phases for... 
 sdiff <- setdiff(pottery_data$filename, basename(jpgs))

 # the ones we do have both outlines and phases
 isect <- intersect(pottery_data$filename, basename(jpgs))

 jpgs <- jpgs[basename(jpgs) %in% isect]
 pottery_data <- 
  pottery_data %>% 
  filter(filename %in% isect) %>% 
  distinct(filename, phase, .keep_all = TRUE)

 phases_isect <- 
  pottery_data %>%  
  select(filename, phase) %>% 
  filter(filename %in% isect) %>% 
  mutate_all(as.factor) %>% 
  distinct(filename, phase) %>% 
  mutate(phase = as.factor(case_when(
    phase == 'post-e' ~ "Post-European",
    phase == 'pre-e' ~ "Pre-European",
    phase == 'ch-con' ~ "Chinese Contact",
  )))

 # how many pots in each phase?
 phases_isect %>% 
  group_by(phase) %>% 
  tally(sort = T)

 # start shape exploration
 library(Momocs)
 #  it freezes on the empty ones...
 jpgs_imported <- import_jpg(jpgs, threshold = 0.5)

 # Normalise the shapes, from 
 # https://www.repository.cam.ac.uk/bitstream/handle/1810/266423/Supplementary%20Code%20S1.pdf?sequence=20&isAllowed=y
 jpgs_imported_coo <- 
  jpgs_imported %>% 
   Out(., fac = phases_isect)  %>% 
  # Centring, scaling and sampling pseudo-landmarks
   coo_center %>% 
   coo_scale %>% 
   coo_sample(., 1000) %>% 
  # Procrustes superimposition
   fgProcrustes %>% 
  # Normalisation of the starting points
   coo_slidedirection

 jpgs_imported_coo %>% 
  panel(., fac = phases_isect$phase)

 pile(jpgs_imported_coo)

 # check how to do outlines: https://vbonhomme.github.io/Momocs/vignettes/Momocs_speed_dating.html

 #-----------------------------------------------------------
 #  efa

 # this is quite slow...
 op <- efourier(jpgs_imported_coo, norm = FALSE)

 # The progressive capture of shape geometry along the number of harmonics can be visualized with:
  
 calibrate_reconstructions_efourier(jpgs_imported_coo,  
                          id = 1, 
                          range = c(1, 2, 4, 8, 14, 20))

 # mean shape, per group
 bot.ms <- mshapes(op, 2)
 `Chinese Contact`   <- bot.ms$shp$`Chinese Contact` %T>% coo_plot(border="blue")
 `Post-European`  <- bot.ms$shp$`Post-European`  %T>% coo_draw(border="red")
 `Pre-European` <-   bot.ms$shp$`Pre-European`  %T>% coo_draw(border="green")
 legend("topright", 
       lwd=1,
       col=c("blue", "red", "green"), 
       legend=c("Chinese contact", "Post-European", "Pre-European"))

 # Thin plate spline analysis (TPS)
 # Thin plate splines (TPS) was used to compare average shapes and visualise the outline deformations required to pass from an extreme of the morphospace to the other.
 # Deformation grids
 png("Pottery_outlines/tps-pre-Euro-to-post-Euro.png", res = 300, w = 900, h = 900)
 tps_grid(`Pre-European`, `Post-European` , shp.lwd = c(2, 2), shp.border = c("red", "blue"),
         amp = 1, grid.size = 18, legend = T)
 dev.off()

 png("Pottery_outlines/tps-pre-Euro-to-Chinese.png", res = 300, w = 900, h = 900)
 tps_grid(`Pre-European`, `Chinese Contact`, shp.lwd = c(2, 2), shp.border = c("red", "blue"),
         amp = 1, grid.size = 18, legend = T)
 dev.off()

 # Isodeformation lines
 tps_iso(`Pre-European`, `Post-European`,  shp.lwd = c(2, 2), shp.border = c("red", "blue"),
        amp = 0.5, iso.nb = 10000, iso.levels = 12, legend.text = c("Pre-contact", "Post-contact"),
        palette = col_summer, grid = F)


 #-------------------------------------------------------------
 # pca

 # these are fast
 op.p <- PCA(op)

 # We can see the pots groups by phase, and their shape distribution
 # according to the first two principle components of the EFA 
 # PC1 and PC2 explain 79% of variation

 png("Pottery_outlines/pca-biplot.png", res = 300, w = 1500, h = 1500)
 plot(op.p, 2, 
     morpho = TRUE, 
     ellipses = TRUE,
     cex = 1, 
     cex.labelsgroups = 1.5)

 dev.off()


 plot(PCA(op), 2, pos.shp="full")
 plot(PCA(op), 2, pos.shp="range")
 plot(PCA(op), 2, pos.shp="xy")
 plot(PCA(op), 2, pos.shp="range_axes")
 plot(PCA(op), 2, pos.shp="full_axes")


 # the contributions of each PC to the shapes
 pc_contrib <- PCcontrib(op.p, nax = 1:3) 
 pc_contrib$gg +  
  geom_polygon(fill="slategrey", col="black") + 
  ggtitle("Shape variation along PC axes") +
  theme_minimal(base_size = 6)
  
 ggsave("Pottery_outlines/pca-contrib.png")
 scree_plot(op.p)

 # PC1 is height of vessel
 # PC2 is neck constriction
 # PC3 is height of neck and roundness of body

 #-----------------------------------------------------------
 # lda

 # we performed a linear discriminant analysis (LDA) based on the new shape variables (PCs), with a leave-one-out cross-validation procedure, to identify the linear combination of shape features that was able to discriminate between phases

 # we work on PCA scores
 bot.l <- LDA(op.p, 2)
 # print a summary, along with the leave-one-out cross-validation table. 
 bot.l 
 # plot.LDA works pretty much with the same grammar as plot.PCA
 # here we only have one LD
 plot(bot.l)
 # plot the cross-validation table
 plot_CV(bot.l) 

 # Does not produce an efficient separation between groups and a cross-validation (leave-one-out) at phase level showing a low confidence in the reclassification. But best for Pre-e, indicating these have the most distinctive shapes

 #--------------------------------------------------
 # Unsupervised model-based clustering

 # what are the best grouping of the pots by shape?
 library(mclust)

 BIC <- mclustBIC(op.p$x[ , 1:5])
 mod1 <- Mclust(op.p$x[ , 1:5], x = BIC)
 summary(mod1, parameters = TRUE)
 plot(mod1, what = "classification")

 # no clusters apparent 

 #----------------------------------------------------
 # MANOVA: Multivariate Analysis of (co)variace
 # We can test for a difference in the distribution of PC scores by calculating a pairwise combination between every levels of a fac. 

 manv <-   MANOVA_PW(op.p, "phase")

 manv$summary

 # shows sig diff in shape for all but ch-con vs. post-e

 # $summary
 # Df    Pillai approx F num Df den Df
 # ch-con - post-e  1 0.3900054 1.685582     11     29
 # ch-con - pre-e   1 0.7083273 7.506274     11     34
 # post-e - pre-e   1 0.3677487 2.485230     11     47
 # Pr(>F)
 # ch-con - post-e 1.270691e-01
 # ch-con - pre-e  2.411143e-06
 # post-e - pre-e  1.506824e-02


 #----------------------------------------------------
 # GAMMS from https://www-nature-com.offcampus.lib.washington.edu/articles/s41598-018-20122-9

 # Generalised additive mixed models (GAMMs) were used to explain shape variance with respect to mean environmental parameters and shell size, and to compare between individual shape features (PCs). A GAMM is a generalised linear mixed model with a linear predictor involving a sum of smooth functions of covariates. The model allowed for flexible specification of the dependence of the response on the covariates by defining regression splines (smoothing functions) and estimating their optimal degree of smoothness, rather than calculating parametric relationships

 ggplot(pottery_data,
       aes(phase,
           as.numeric(Thick1_Neck))) +
  geom_boxplot()

 pottery_data$thick_neck1 <- as.numeric(pottery_data$Thick1_Neck)
 pottery_data$thick_body1 <- as.numeric(pottery_data$Thick1_Body)
 pottery_data$height <- as.numeric(pottery_data$Hight)

 plot(op.p$x[ , 2], pottery_data$thick_body1)

 gams_input <- data.frame(thick_body1 = pottery_data$thick_body1 , 
                         height = pottery_data$height, 
                         shape_pc1 = op.p$x[ , 1])

 library(brms)
 m2 <- brm(bf(shape_pc1 ~ s(thick_body1)),
          data = gams_input, family = gaussian(), cores = 4, seed = 17,
          iter = 4000, warmup = 1000, thin = 10, refresh = 0,
          control = list(adapt_delta = 0.99))
 summary(m2) # so many zeros... not very promising 
 # plot marginal effects for each predictor
 plot(marginal_effects(m2), ask = FALSE)
 loo(m2)
 pp_check(m2)

 #----------------------------------------------------
 # Spatial


 #----------------------------------------------------
 # CVs on shape PCs for standardisation

 # plot PC1-3 distribution by phase
 pc_plot <- data_frame(pc = c(op.p$x[,1],
                             op.p$x[,2],
                             op.p$x[,3]),
                      phase = rep(op.p$fac$phase, 3),
                        pcn = str_glue('PC{unlist(map(1:3, ~rep(.x, length(op.p$x[,1]))))}'))


 ggplot(pc_plot,
       aes(phase,
           pc)) +
  geom_violin() +
  ggforce::geom_sina() +
  facet_wrap(~ pcn, ncol = 1) +
  theme_minimal()
 ggsave("Pottery_outlines/pcs-boxplot-for-shape-variation.png",
       h = 10, w = 5)


 library(cvequality)
 pc1 <- data.frame(
 x  = op.p$x[,1],
 y = as.character(op.p$fac$phase),
 stringsAsFactors = F)

 cv_test_pc1 <- mslr_test(nr = 1000, pc1$x, pc1$y)

 pc2 <- data.frame(
  x  = op.p$x[,2],
  y = as.character(op.p$fac$phase),
  stringsAsFactors = F)

 cv_test_pc2 <- mslr_test(nr = 1000, pc2$x, pc2$y)

 pc3 <- data.frame(
  x  = op.p$x[,3],
  y = as.character(op.p$fac$phase),
  stringsAsFactors = F)

 cv_test_pc3 <- mslr_test(nr = 1000, pc3$x, pc3$y)

 # do this pairwise
 cv_test_pc1.1 <- 
  mslr_test(nr = 1000, 
            pc1$x[pc1$y %in% c("Post-European", 
                               "Pre-European")], 
            pc1$y[pc1$y %in% c("Post-European", 
                               "Pre-European")])

 cv_test_pc1.2 <- 
  mslr_test(nr = 1000, 
            pc1$x[pc1$y %in% c("Chinese Contact", 
                               "Pre-European")], 
            pc1$y[pc1$y %in% c("Chinese Contact", 
                               "Pre-European")])

 cv_test_pc1.3 <- 
  mslr_test(nr = 1000, 
            pc1$x[pc1$y %in% c("Chinese Contact", 
                               "Post-European")], 
            pc1$y[pc1$y %in% c("Chinese Contact", 
                               "Post-European")])

 PC1_cv_tests <- 
 bind_rows(cv_test_pc1.1, cv_test_pc1.2, cv_test_pc1.3) %>% 
  bind_cols(phases = c("Post-European vs Pre-European",
              "Chinese Contact vs Pre-European",
              "Chinese Contact vs Post-European"))

 cv_test_pc2.1 <- 
  mslr_test(nr = 1000, 
            pc2$x[pc2$y %in% c("Post-European", 
                               "Pre-European")], 
            pc2$y[pc2$y %in% c("Post-European", 
                               "Pre-European")])

 cv_test_pc2.2 <- 
  mslr_test(nr = 1000, 
            pc2$x[pc2$y %in% c("Chinese Contact", 
                               "Pre-European")], 
            pc2$y[pc2$y %in% c("Chinese Contact", 
                               "Pre-European")])

 cv_test_pc2.3 <- 
  mslr_test(nr = 1000, 
            pc2$x[pc2$y %in% c("Chinese Contact", 
                               "Post-European")], 
            pc2$y[pc2$y %in% c("Chinese Contact", 
                               "Post-European")])

 PC2_cv_tests <- 
  bind_rows(cv_test_pc2.1, 
            cv_test_pc2.2, 
            cv_test_pc2.3) %>% 
  bind_cols(phases = c("Post-European vs Pre-European",
                       "Chinese Contact vs Pre-European",
                       "Chinese Contact vs Post-European"))

 bind_rows(PC1_cv_tests, 
          PC2_cv_tests, .id = "PC") %>% 
  mutate(PC = str_glue('PC{PC}')) %>% 
  mutate(colour = if_else(p_value < 0.05, "red", "black")) %>% 
  ggplot(aes(str_wrap(phases, 40),
             p_value,
             colour = I(colour))) +
  geom_point(size = 8) +
  facet_wrap(~ PC, ncol = 1) +
  theme_minimal(base_size = 12) +
  coord_cartesian(clip = 'off') +
  xlab("") +
  ylab("CV equality test p-values")

 ggsave("Pottery_outlines/cv-test-on-pcs.png", h = 3, w = 12)
	# to make traces...
	# Import an EPS file in Inkscape
	# Trace over it to show half of vessel profile, from top of rim to centre of base, about 20-30 segements
	# detelete EPS from from Inkscape
	# export PNG of trace from Inkscape



	library(magick)
	lf <- list.files("../traces", full.names = TRUE)

	for (i in seq_along(jpgs)) {
	image_write(image_convert(image_read(lf[[i]]), "jpg"), paste0(lf[[i]], "_output.jpg"))
	}

	jpgs<- list.files("../traces", pattern = ".jpg", full.names = TRUE)
	library(Momocs)
	jpgs_imported <- import_jpg(jpgs, threshold = 0.1)

	jpgs_imported_coo <-
	jpgs_imported[-2] %>%
	Opn() %>%
	Out() %>%
	coo_close() %>%
	coo_center %>%
	coo_scale %>%
	coo_align() %>%
	coo_rotate(theta = pi/2) %>%
	coo_slidedirection("up")

	jpgs_imported_coo %>%
	panel()

	# efa
	op <- efourier(jpgs_imported_coo, norm= FALSE)
	op.p <- PCA(op)
	plot(op.p,
	cex = 3,
	labelspoints = TRUE,
	cex.labelspoints = 1)
	PCcontrib(op.p)

	# get SVG files from google drive
	library(googledrive)
	ID <- "https://drive.google.com/drive/u/1/folders/1wNLi_v40KX-U6s42eHrwueiearLvZ5Jv"
	g_drive <- drive_ls( path = "Pot_outline_trace", recursive = TRUE, pattern = ".svg$")

	# download SVGs from google
	library(tidyverse)
	map2(g_drive$id,
	g_drive$name,
	~as_id(.x) %>% drive_download(path = str_glue('Pottery_outlines/outlines_svg/{.y}')))

	lf <- list.files("Pottery_outlines/outlines_svg",
	full.names = TRUE)


	library(xml2)
	remove_scan_and_fill <- function(svg_file) {
	# remove the scan image from the SVG
	t1 <- read_xml(svg_file)
	t2 <- xml_children(t1)
	scan_layer <- t2[[4]]
	xml_remove(scan_layer)
	# now we have a SVG without the scan layer

	# change from outline to fill
	stroke_and_fill_node <- xml_children(t1)[4]
	stroke_and_fill_node_inner <- xml_children(stroke_and_fill_node)
	xml_attr(stroke_and_fill_node_inner, "style") <-
	"fill:#000000;stroke:none;stroke-width:5;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"

	write_xml(t1,
	str_glue("Pottery_outlines/outlines_svg/fill_{basename(svg_file)}"))

	# fit image object to canvas in svg so it's all captured during the conversion to JPG
	system(str_glue('inkscape --verb=EditSelectAll --verb=AlignHorizontalCenter --verb=AlignVerticalCenter --verb=FitCanvasToSelectionOrDrawing --verb=FileSave --verb=FileQuit "$(pwd)/Pottery_outlines/outlines_svg/fill_{basename(svg_file)}"'))

	}

	# run the function on all SVGs
	walk(lf, remove_scan_and_fill)

	# convert SVG to PNG
	filled_svgs <- list.files("Pottery_outlines/outlines_svg", pattern = "^fill_", full.names = TRUE)

	# convert svgs to to jpg
	library(magick)
	map(filled_svgs,
	~image_read_svg(.x) %>%
	image_background("white") %>%
	image_convert("jpg") %>%
	image_write(str_glue("{.x}_output.jpg")))

	# from https://gist.github.com/benmarwick/3d160da203e4abaacd670d895f1ca72a

	#--------------------------------------------------
	# Once we have the jpgs, start from here
	library(tidyverse)

	# get list of jpgs only
	jpgs <- list.files("Pottery_outlines/outlines_svg", pattern = ".jpg", full.names = TRUE)

	# join file names to phase
	pottery_data <- readr::read_csv("Pottery_outlines/KWL_Ceramic_final2018_with_phases.csv")
	pottery_data <- pottery_data %>%
	mutate(filename = str_glue('fill_{filename}.svg_output.jpg'))

	# some outlines that we have no phases for...
	sdiff <- setdiff(pottery_data$filename, basename(jpgs))

	# the ones we do have both outlines and phases
	isect <- intersect(pottery_data$filename, basename(jpgs))

	jpgs <- jpgs[basename(jpgs) %in% isect]
	pottery_data <-
	pottery_data %>%
	filter(filename %in% isect) %>%
	distinct(filename, phase, .keep_all = TRUE)

	phases_isect <-
	pottery_data %>%
	select(filename, phase) %>%
	filter(filename %in% isect) %>%
	mutate_all(as.factor) %>%
	distinct(filename, phase) %>%
	mutate(phase = as.factor(case_when(
	phase == 'post-e' ~ "Post-European",
	phase == 'pre-e' ~ "Pre-European",
	phase == 'ch-con' ~ "Chinese Contact",
	)))

	# how many pots in each phase?
	phases_isect %>%
	group_by(phase) %>%
	tally(sort = T)

	# start shape exploration
	library(Momocs)
	# it freezes on the empty ones...
	jpgs_imported <- import_jpg(jpgs, threshold = 0.5)

	# Normalise the shapes, from
	# https://www.repository.cam.ac.uk/bitstream/handle/1810/266423/Supplementary%20Code%20S1.pdf?sequence=20&isAllowed=y
	jpgs_imported_coo <-
	jpgs_imported %>%
	Out(., fac = phases_isect) %>%
	# Centring, scaling and sampling pseudo-landmarks
	coo_center %>%
	coo_scale %>%
	coo_sample(., 1000) %>%
	# Procrustes superimposition
	fgProcrustes %>%
	# Normalisation of the starting points
	coo_slidedirection

	jpgs_imported_coo %>%
	panel(., fac = phases_isect$phase)

	pile(jpgs_imported_coo)

	# check how to do outlines: https://vbonhomme.github.io/Momocs/vignettes/Momocs_speed_dating.html

	#-----------------------------------------------------------
	# efa

	# this is quite slow...
	op <- efourier(jpgs_imported_coo, norm = FALSE)

	# The progressive capture of shape geometry along the number of harmonics can be visualized with:

	calibrate_reconstructions_efourier(jpgs_imported_coo,
	id = 1,
	range = c(1, 2, 4, 8, 14, 20))

	# mean shape, per group
	bot.ms <- mshapes(op, 2)
	`Chinese Contact` <- bot.ms$shp$`Chinese Contact` %T>% coo_plot(border="blue")
	`Post-European` <- bot.ms$shp$`Post-European` %T>% coo_draw(border="red")
	`Pre-European` <- bot.ms$shp$`Pre-European` %T>% coo_draw(border="green")
	legend("topright",
	lwd=1,
	col=c("blue", "red", "green"),
	legend=c("Chinese contact", "Post-European", "Pre-European"))

	# Thin plate spline analysis (TPS)
	# Thin plate splines (TPS) was used to compare average shapes and visualise the outline deformations required to pass from an extreme of the morphospace to the other.
	# Deformation grids
	png("Pottery_outlines/tps-pre-Euro-to-post-Euro.png", res = 300, w = 900, h = 900)
	tps_grid(`Pre-European`, `Post-European` , shp.lwd = c(2, 2), shp.border = c("red", "blue"),
	amp = 1, grid.size = 18, legend = T)
	dev.off()

	png("Pottery_outlines/tps-pre-Euro-to-Chinese.png", res = 300, w = 900, h = 900)
	tps_grid(`Pre-European`, `Chinese Contact`, shp.lwd = c(2, 2), shp.border = c("red", "blue"),
	amp = 1, grid.size = 18, legend = T)
	dev.off()

	# Isodeformation lines
	tps_iso(`Pre-European`, `Post-European`, shp.lwd = c(2, 2), shp.border = c("red", "blue"),
	amp = 0.5, iso.nb = 10000, iso.levels = 12, legend.text = c("Pre-contact", "Post-contact"),
	palette = col_summer, grid = F)


	#-------------------------------------------------------------
	# pca

	# these are fast
	op.p <- PCA(op)

	# We can see the pots groups by phase, and their shape distribution
	# according to the first two principle components of the EFA
	# PC1 and PC2 explain 79% of variation

	png("Pottery_outlines/pca-biplot.png", res = 300, w = 1500, h = 1500)
	plot(op.p, 2,
	morpho = TRUE,
	ellipses = TRUE,
	cex = 1,
	cex.labelsgroups = 1.5)

	dev.off()


	plot(PCA(op), 2, pos.shp="full")
	plot(PCA(op), 2, pos.shp="range")
	plot(PCA(op), 2, pos.shp="xy")
	plot(PCA(op), 2, pos.shp="range_axes")
	plot(PCA(op), 2, pos.shp="full_axes")


	# the contributions of each PC to the shapes
	pc_contrib <- PCcontrib(op.p, nax = 1:3)
	pc_contrib$gg +
	geom_polygon(fill="slategrey", col="black") +
	ggtitle("Shape variation along PC axes") +
	theme_minimal(base_size = 6)

	ggsave("Pottery_outlines/pca-contrib.png")
	scree_plot(op.p)

	# PC1 is height of vessel
	# PC2 is neck constriction
	# PC3 is height of neck and roundness of body

	#-----------------------------------------------------------
	# lda

	# we performed a linear discriminant analysis (LDA) based on the new shape variables (PCs), with a leave-one-out cross-validation procedure, to identify the linear combination of shape features that was able to discriminate between phases

	# we work on PCA scores
	bot.l <- LDA(op.p, 2)
	# print a summary, along with the leave-one-out cross-validation table.
	bot.l
	# plot.LDA works pretty much with the same grammar as plot.PCA
	# here we only have one LD
	plot(bot.l)
	# plot the cross-validation table
	plot_CV(bot.l)

	# Does not produce an efficient separation between groups and a cross-validation (leave-one-out) at phase level showing a low confidence in the reclassification. But best for Pre-e, indicating these have the most distinctive shapes

	#--------------------------------------------------
	# Unsupervised model-based clustering

	# what are the best grouping of the pots by shape?
	library(mclust)

	BIC <- mclustBIC(op.p$x[ , 1:5])
	mod1 <- Mclust(op.p$x[ , 1:5], x = BIC)
	summary(mod1, parameters = TRUE)
	plot(mod1, what = "classification")

	# no clusters apparent

	#----------------------------------------------------
	# MANOVA: Multivariate Analysis of (co)variace
	# We can test for a difference in the distribution of PC scores by calculating a pairwise combination between every levels of a fac.

	manv <- MANOVA_PW(op.p, "phase")

	manv$summary

	# shows sig diff in shape for all but ch-con vs. post-e

	# $summary
	# Df Pillai approx F num Df den Df
	# ch-con - post-e 1 0.3900054 1.685582 11 29
	# ch-con - pre-e 1 0.7083273 7.506274 11 34
	# post-e - pre-e 1 0.3677487 2.485230 11 47
	# Pr(>F)
	# ch-con - post-e 1.270691e-01
	# ch-con - pre-e 2.411143e-06
	# post-e - pre-e 1.506824e-02


	#----------------------------------------------------
	# GAMMS from https://www-nature-com.offcampus.lib.washington.edu/articles/s41598-018-20122-9

	# Generalised additive mixed models (GAMMs) were used to explain shape variance with respect to mean environmental parameters and shell size, and to compare between individual shape features (PCs). A GAMM is a generalised linear mixed model with a linear predictor involving a sum of smooth functions of covariates. The model allowed for flexible specification of the dependence of the response on the covariates by defining regression splines (smoothing functions) and estimating their optimal degree of smoothness, rather than calculating parametric relationships

	ggplot(pottery_data,
	aes(phase,
	as.numeric(Thick1_Neck))) +
	geom_boxplot()

	pottery_data$thick_neck1 <- as.numeric(pottery_data$Thick1_Neck)
	pottery_data$thick_body1 <- as.numeric(pottery_data$Thick1_Body)
	pottery_data$height <- as.numeric(pottery_data$Hight)

	plot(op.p$x[ , 2], pottery_data$thick_body1)

	gams_input <- data.frame(thick_body1 = pottery_data$thick_body1 ,
	height = pottery_data$height,
	shape_pc1 = op.p$x[ , 1])

	library(brms)
	m2 <- brm(bf(shape_pc1 ~ s(thick_body1)),
	data = gams_input, family = gaussian(), cores = 4, seed = 17,
	iter = 4000, warmup = 1000, thin = 10, refresh = 0,
	control = list(adapt_delta = 0.99))
	summary(m2) # so many zeros... not very promising
	# plot marginal effects for each predictor
	plot(marginal_effects(m2), ask = FALSE)
	loo(m2)
	pp_check(m2)

	#----------------------------------------------------
	# Spatial


	#----------------------------------------------------
	# CVs on shape PCs for standardisation

	# plot PC1-3 distribution by phase
	pc_plot <- data_frame(pc = c(op.p$x[,1],
	op.p$x[,2],
	op.p$x[,3]),
	phase = rep(op.p$fac$phase, 3),
	pcn = str_glue('PC{unlist(map(1:3, ~rep(.x, length(op.p$x[,1]))))}'))


	ggplot(pc_plot,
	aes(phase,
	pc)) +
	geom_violin() +
	ggforce::geom_sina() +
	facet_wrap(~ pcn, ncol = 1) +
	theme_minimal()
	ggsave("Pottery_outlines/pcs-boxplot-for-shape-variation.png",
	h = 10, w = 5)


	library(cvequality)
	pc1 <- data.frame(
	x = op.p$x[,1],
	y = as.character(op.p$fac$phase),
	stringsAsFactors = F)

	cv_test_pc1 <- mslr_test(nr = 1000, pc1$x, pc1$y)

	pc2 <- data.frame(
	x = op.p$x[,2],
	y = as.character(op.p$fac$phase),
	stringsAsFactors = F)

	cv_test_pc2 <- mslr_test(nr = 1000, pc2$x, pc2$y)

	pc3 <- data.frame(
	x = op.p$x[,3],
	y = as.character(op.p$fac$phase),
	stringsAsFactors = F)

	cv_test_pc3 <- mslr_test(nr = 1000, pc3$x, pc3$y)

	# do this pairwise
	cv_test_pc1.1 <-
	mslr_test(nr = 1000,
	pc1$x[pc1$y %in% c("Post-European",
	"Pre-European")],
	pc1$y[pc1$y %in% c("Post-European",
	"Pre-European")])

	cv_test_pc1.2 <-
	mslr_test(nr = 1000,
	pc1$x[pc1$y %in% c("Chinese Contact",
	"Pre-European")],
	pc1$y[pc1$y %in% c("Chinese Contact",
	"Pre-European")])

	cv_test_pc1.3 <-
	mslr_test(nr = 1000,
	pc1$x[pc1$y %in% c("Chinese Contact",
	"Post-European")],
	pc1$y[pc1$y %in% c("Chinese Contact",
	"Post-European")])

	PC1_cv_tests <-
	bind_rows(cv_test_pc1.1, cv_test_pc1.2, cv_test_pc1.3) %>%
	bind_cols(phases = c("Post-European vs Pre-European",
	"Chinese Contact vs Pre-European",
	"Chinese Contact vs Post-European"))

	cv_test_pc2.1 <-
	mslr_test(nr = 1000,
	pc2$x[pc2$y %in% c("Post-European",
	"Pre-European")],
	pc2$y[pc2$y %in% c("Post-European",
	"Pre-European")])

	cv_test_pc2.2 <-
	mslr_test(nr = 1000,
	pc2$x[pc2$y %in% c("Chinese Contact",
	"Pre-European")],
	pc2$y[pc2$y %in% c("Chinese Contact",
	"Pre-European")])

	cv_test_pc2.3 <-
	mslr_test(nr = 1000,
	pc2$x[pc2$y %in% c("Chinese Contact",
	"Post-European")],
	pc2$y[pc2$y %in% c("Chinese Contact",
	"Post-European")])

	PC2_cv_tests <-
	bind_rows(cv_test_pc2.1,
	cv_test_pc2.2,
	cv_test_pc2.3) %>%
	bind_cols(phases = c("Post-European vs Pre-European",
	"Chinese Contact vs Pre-European",
	"Chinese Contact vs Post-European"))

	bind_rows(PC1_cv_tests,
	PC2_cv_tests, .id = "PC") %>%
	mutate(PC = str_glue('PC{PC}')) %>%
	mutate(colour = if_else(p_value < 0.05, "red", "black")) %>%
	ggplot(aes(str_wrap(phases, 40),
	p_value,
	colour = I(colour))) +
	geom_point(size = 8) +
	facet_wrap(~ PC, ncol = 1) +
	theme_minimal(base_size = 12) +
	coord_cartesian(clip = 'off') +
	xlab("") +
	ylab("CV equality test p-values")

	ggsave("Pottery_outlines/cv-test-on-pcs.png", h = 3, w = 12)