datagistips · September 16, 2012 21:04
diff --git a/city_names_wordcloud.R b/city_names_wordcloud.R
 library(rgdal)
 library(wordcloud)
 library(reshape)
 library(maptools)
 library(classInt)
 library(FactoMineR)
 library(FNN)

 ### LOAD DATA ###
 f <- readOGR(".", "COMMUNE")
 deps <-  readOGR(".", "DEPARTEMENT")


 ####################################
 # DETECTION DE LA DERNIERE SYLLABE #
 ####################################

 vyl <- "[aeiouyîïéè]"
 csn <- "[^aeiouyîïéè-]"
 csnPlus <- "(tt|ss|mm|pp|nn|cc|rr|th|tr|gu|gn|gl|gr|gh|pl|ph|pr|br|bl|cr|cl|ch|vr|dr)"
 
 regles <- paste(paste("(", csn, "?", vyl, "+", csn, "*", "$", ")", sep=""), # CVC(s)
                paste("(", csnPlus, vyl, "+", csn, "*", "$", ")",sep=""), #CCVC(s)
                paste("(", csn, "?", vyl, "+", csn, "{1,2}", "(e)[s]*$", ")", sep=""),#Ce(s)
                paste("(", csnPlus, vyl, "+", csn, "{1,2}", "(e)[s]*$", ")", sep=""),#CV(C)e(s)
                sep="|"
               )
 txt <- tolower(f$NOM_COMM)
 pos <- regexpr(regles, txt)
 sylls <- sapply(1:length(txt), function(i) substr(txt[i], pos[i], nchar(txt[i])))


 ####################################
 # COMPTAGE DES SYLLABES PAR REGION #
 ####################################

 df <- data.frame(region=f$NOM_REGION, lastsyllabus=sylls, value=1)
 r <- cast(df, region~lastsyllabus, sum)
 rownames(r) <- r$region; r$region <- NULL


 #######
 # AFC #
 #######

 afc <- CA(r, ncp=2)

 coordsReg <- afc$row$coord[, c(1,2)]
 coordsSyl <- afc$col$coord[, c(1,2)]

 n <- 50
 nn <- get.knnx(coordsSyl, coordsReg, k=n)


 #################
 # COUCHE REGION #
 #################

 reg <- unionSpatialPolygons(deps, deps$NOM_REGION)
 reg <- spChFIDs(reg, names(reg))
 reg <- SpatialPolygonsDataFrame(reg, data=data.frame(nom_reg=names(reg), row.names=names(reg)))


 ################################
 # COUCHE POSITION DES LIBELLES #
 ################################

 asc <- TRUE
 out <- vector(mode="list", length=nrow(r))
 for (i in seq(along=out)) {
  sylls <- names(r)[nn$nn.index[i, ]]
  weights <- nn$nn.dist[i, ]
  df <- data.frame(sylls=sylls, weights=weights, region=reg$nom_reg[i])
  pts <- spsample(reg[i, ], n=n, type="nonaligned")
  if (length(pts) < n) { # parfois, le compte n'est pas bon, donc on crée des points supplémentaires
    pts <-spRbind(spsample(reg[i, ], n=(n-length(pts)), type="random"), pts)
  }  
  out[[i]] <- SpatialPointsDataFrame(pts[1:n, ], data=df)
 }

 labelspt <- do.call("rbind", out)


 #######################
 # TAILLE DES LIBELLES #
 #######################

 nCuts <- 6
 cexs <- seq(1.2, 1.5, length.out=nCuts+1)

 szl <- lapply(out, function(x) {
  if (length(unique(x$weights))==1) {
    return(rep(1.3, n)) 
  }
  else {
    ints <- classIntervals(x$weights, nCuts, style="jenks")
    return(cexs[findInterval(x$weights, ints$brk)])
  }
 })

 szs <- unlist(szl)


 #########################
 # COULEUR DES  LIBELLES #
 #########################

 cols <- rainbow(nrow(reg))[as.numeric(as.factor(labelspt$region))]


 #########
 # CARTE #
 #########

 coords <- coordinates(labelspt)
 plot(coords[,1], coords[,2], type="n", axes=FALSE, xlab=NA, ylab=NA)
 nc <- wordlayout(coords[,1], coords[,2], labelspt$sylls, cex=szs)
 text(nc[,1]+0.5*nc[,3], nc[,2]+0.5*nc[,4], labelspt$sylls, cex=szs, col=cols)
 plot(as(reg, "SpatialLines"), add=T, col=rgb(.1,.1,.1,.1), lty=2)
	library(rgdal)
	library(wordcloud)
	library(reshape)
	library(maptools)
	library(classInt)
	library(FactoMineR)
	library(FNN)

	### LOAD DATA ###
	f <- readOGR(".", "COMMUNE")
	deps <- readOGR(".", "DEPARTEMENT")


	####################################
	# DETECTION DE LA DERNIERE SYLLABE #
	####################################

	vyl <- "[aeiouyîïéè]"
	csn <- "[^aeiouyîïéè-]"
	csnPlus <- "(tt\|ss\|mm\|pp\|nn\|cc\|rr\|th\|tr\|gu\|gn\|gl\|gr\|gh\|pl\|ph\|pr\|br\|bl\|cr\|cl\|ch\|vr\|dr)"

	regles <- paste(paste("(", csn, "?", vyl, "+", csn, "*", "$", ")", sep=""), # CVC(s)
	paste("(", csnPlus, vyl, "+", csn, "*", "$", ")",sep=""), #CCVC(s)
	paste("(", csn, "?", vyl, "+", csn, "{1,2}", "(e)[s]*$", ")", sep=""),#Ce(s)
	paste("(", csnPlus, vyl, "+", csn, "{1,2}", "(e)[s]*$", ")", sep=""),#CV(C)e(s)
	sep="\|"
	)
	txt <- tolower(f$NOM_COMM)
	pos <- regexpr(regles, txt)
	sylls <- sapply(1:length(txt), function(i) substr(txt[i], pos[i], nchar(txt[i])))


	####################################
	# COMPTAGE DES SYLLABES PAR REGION #
	####################################

	df <- data.frame(region=f$NOM_REGION, lastsyllabus=sylls, value=1)
	r <- cast(df, region~lastsyllabus, sum)
	rownames(r) <- r$region; r$region <- NULL


	#######
	# AFC #
	#######

	afc <- CA(r, ncp=2)

	coordsReg <- afc$row$coord[, c(1,2)]
	coordsSyl <- afc$col$coord[, c(1,2)]

	n <- 50
	nn <- get.knnx(coordsSyl, coordsReg, k=n)


	#################
	# COUCHE REGION #
	#################

	reg <- unionSpatialPolygons(deps, deps$NOM_REGION)
	reg <- spChFIDs(reg, names(reg))
	reg <- SpatialPolygonsDataFrame(reg, data=data.frame(nom_reg=names(reg), row.names=names(reg)))


	################################
	# COUCHE POSITION DES LIBELLES #
	################################

	asc <- TRUE
	out <- vector(mode="list", length=nrow(r))
	for (i in seq(along=out)) {
	sylls <- names(r)[nn$nn.index[i, ]]
	weights <- nn$nn.dist[i, ]
	df <- data.frame(sylls=sylls, weights=weights, region=reg$nom_reg[i])
	pts <- spsample(reg[i, ], n=n, type="nonaligned")
	if (length(pts) < n) { # parfois, le compte n'est pas bon, donc on crée des points supplémentaires
	pts <-spRbind(spsample(reg[i, ], n=(n-length(pts)), type="random"), pts)
	}
	out[[i]] <- SpatialPointsDataFrame(pts[1:n, ], data=df)
	}

	labelspt <- do.call("rbind", out)


	#######################
	# TAILLE DES LIBELLES #
	#######################

	nCuts <- 6
	cexs <- seq(1.2, 1.5, length.out=nCuts+1)

	szl <- lapply(out, function(x) {
	if (length(unique(x$weights))==1) {
	return(rep(1.3, n))
	}
	else {
	ints <- classIntervals(x$weights, nCuts, style="jenks")
	return(cexs[findInterval(x$weights, ints$brk)])
	}
	})

	szs <- unlist(szl)


	#########################
	# COULEUR DES LIBELLES #
	#########################

	cols <- rainbow(nrow(reg))[as.numeric(as.factor(labelspt$region))]


	#########
	# CARTE #
	#########

	coords <- coordinates(labelspt)
	plot(coords[,1], coords[,2], type="n", axes=FALSE, xlab=NA, ylab=NA)
	nc <- wordlayout(coords[,1], coords[,2], labelspt$sylls, cex=szs)
	text(nc[,1]+0.5nc[,3], nc[,2]+0.5nc[,4], labelspt$sylls, cex=szs, col=cols)
	plot(as(reg, "SpatialLines"), add=T, col=rgb(.1,.1,.1,.1), lty=2)