Skip to content

Instantly share code, notes, and snippets.

@mwacc
Created November 2, 2013 19:09
Show Gist options
  • Select an option

  • Save mwacc/7282337 to your computer and use it in GitHub Desktop.

Select an option

Save mwacc/7282337 to your computer and use it in GitHub Desktop.
sampler for data population
set.seed(100)
N <- 10000000
USER_NUM <- 25000
MOVIE_NUM <- 10000
dayOfWeek = rnorm(N, 6, 2)
partOfDay = c(1,2,3,3,4,4,4)
isRainOrSnow = c(0,0,0,1)
isRainOfSnowBefore = c(0,0,0,1)
temperature = c(0,1,2,3,4)
cameFrom = c(0,1,2,3,4,5,6,7)
price = rnorm(N,5,2)
age = rnorm(N,30,14)
getRow = function(i) {
userid = floor( runif(1,1,USER_NUM+1) )
movieid = floor( runif(1,1,MOVIE_NUM+1) )
return( paste(userid,
movieid,
round( (dayOfWeek[i%%length(dayOfWeek)+1] %% 7) ),
sample(partOfDay[i%%length(partOfDay)+1], 1),
sample(isRainOrSnow[i%%length(isRainOrSnow)+1], 1),
sample(isRainOfSnowBefore[i%%length(isRainOfSnowBefore)+1], 1),
sample(temperature[i%%length(temperature)+1], 1),
sample(cameFrom[i%%length(cameFrom)+1], 1),
round(price[i%%length(price)+1], digits=2),
round( ifelse(age[i%%length(age)+1] < 18, runif(1,18,27), age[i%%length(age)+1]) ),
sep=",") )
}
fileConn<-file("/home/kostya/data04.txt", open="at")
for(i in 1:N) {
writeLines( getRow(i), fileConn )
#cat( getRow(i),sep="\n" )
}
close(fileConn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment