Created
January 22, 2013 02:32
-
-
Save Vessy/4591583 to your computer and use it in GitHub Desktop.
Read .mp3 files from a given directory, compare songs, and play them in order of similarity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Read .mp3 files from a given directory, compare songs, and play them in order of similarity | |
library("plyr") | |
library("tuneR") | |
library("seewave") | |
library("compiler") | |
library("foreach") | |
library("doMC") | |
registerDoMC() | |
orderSongs <- function(x, indexHlp = 1) | |
{ | |
sList <-cbind(as.data.frame(x), isIn = rep(0, times = nrow(x))) | |
pList <- c() | |
repeat{ | |
i <- indexHlp[1] | |
indexHlp <- indexHlp[-1] | |
pList <- c(pList, sList$V1[i], sList$V2[i]) | |
sList$isIn[i] <- 1 | |
if (length(which(sList$isIn == 0)) == 0 ) | |
break | |
hlp1 <- which(sList$V1 == i & sList$isIn == 0) | |
hlp2 <- which(sList$V2 == i & sList$isIn == 0) | |
hlp3 <- as.numeric(rownames(sList[sList$isIn == 1 & (sList$V1 > 0 | sList$V2 > 0),])) | |
if(sList$V1[i] > 0 & sList$V2[i] > 0){ | |
indexHlp <- c(indexHlp, setdiff(c(sList$V2[i],sList$V1[i]), hlp3)) | |
}else if (length(hlp1) > 0 | length(hlp2) > 0){ | |
indexHlp <- c(indexHlp, min(hlp1, hlp2)) | |
}else{ | |
indexHlp <- c(indexHlp, max(setdiff(c(sList$V1[i], sList$V2[i]), hlp3)))} | |
if ((sList$V1[i] > 0) & (sList$V2[i] > 0) & ((length(hlp1) > 0) | (length(hlp2) > 0))) | |
indexHlp <- c(indexHlp, min(hlp1, hlp2)) | |
indexHlp <- indexHlp[indexHlp > 0] | |
} | |
-1*pList[pList < 0] | |
} | |
arrangeMusic <- function(inDirectory, startWith = NULL) | |
{ | |
#Get a list of .mp3 files from a given directory and read them in | |
mList <- list.files(path = inDirectory, pattern = ".mp3") | |
S <- alply(mList, 1, function(x) readMP3(x)) | |
print("Reading and processing songs... Depending on the number of the songs, this part may take a few minutes...") | |
#Calculate the frequency spectra | |
#This takes some time, so I will parallelize it | |
fa <- foreach(i = 1:length(S)) %dopar% {ama(S[[i]], plot = FALSE)} | |
#Next, I am going to compare frequency spectra distribution by computing different distance | |
#Distances are not symetrical, e.g. dS1S2 != dS2S1, so I need to calculate all vs. all similarities | |
#(dSiSi is always equal to 0, so I could also exclude those) | |
simTab <- data.frame(V1 = rep(1:length(mList), times = length(mList)), V2 = rep(1:length(mList), each = length(mList))) | |
distS <- ddply(simTab, c("V1", "V2"), function(x) data.frame(IT = itakura.dist(fa[[x$V1]], fa[[x$V2]])$D1, KL = kl.dist(fa[[x$V1]], fa[[x$V2]])$D1, KS = ks.dist(fa[[x$V1]], fa[[x$V2]],f = 44100)$D, LS = logspec.dist(fa[[x$V1]], fa[[x$V2]]))) | |
#Put the average distances in the matrix | |
#But first normalize each of them | |
simS <- matrix(nrow = length(S), ncol=length(S), dimnames = list(mList, mList)) | |
for (i in 1:nrow(distS)) | |
simS[distS$V1[i], distS$V2[i]] <- (distS$IT[i]/max(distS$IT) + distS$KL[i]/max(distS$KL) + distS$KS[i]/max(distS$KS) + distS$LS[i]/max(distS$LS))/4 | |
#Now calculate distances between songs, using the default parametes | |
d <- dist(simS) | |
#And then use hierarchical clustertering to cluster songs based on the distances | |
hc <- hclust(d) | |
#In case we want to visualize the dendogran | |
#plot(hc) | |
#Get the similarity between songs from hierarchical clustering and play the songs | |
#First find the song to start with | |
if (length(startWith) == 0){ | |
playOrder <- orderSongsC(hc$merge) | |
}else{ | |
playOrder <- orderSongsC(hc$merge, which(mList == startWith))} | |
#Then play songs | |
for (i in 1:length(playOrder)) | |
play(S[[playOrder[i]]], "/usr/bin/mplayer") | |
} | |
#Compile functions | |
orderSongsC <- cmpfun(orderSongs) | |
arrangeMusicC <- cmpfun(arrangeMusic) | |
#An example | |
arrangeMusicC(inDirectory = getwd()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment