cigrainger · August 17, 2014 22:51
diff --git a/gistfile1.py b/gistfile1.py
 import pandas as pd
 import pystan

 df = pd.read_csv('firms.csv')
 df = df.dropna()
 country = pd.Categorical(df.country)
 year = pd.Categorical(df.year)
 nace = pd.Categorical(df.nace)
 bvd_id = pd.Categorical(df.bvd_id)

 logit_code = """
 data {
  int<lower=0> N; 
  int<lower=0> n_year; 
  int<lower=0> n_country; 
  int<lower=0> n_nace;  
  int<lower=0,upper=n_year> year[N];
  int<lower=0,upper=n_country> country[N];
  int<lower=0,upper=n_nace> nace[N];
  vector[N] assets;
  vector[N] turnover;
  vector<lower=0>[N] stock;
  int<lower=0,upper=1> y[N];
 } 
 parameters {
  vector[n_year] b_year;
  vector[n_country] b_country;
  vector[n_nace] b_nace;
  vector[4] beta;
  real mu;
  real mu_year;
  real mu_firm;
  real mu_country;
  real mu_nace;
  real<lower=0,upper=100> sigma_year;
  real<lower=0,upper=100> sigma_country;
  real<lower=0,upper=100> sigma_nace;
 } 
 transformed parameters {
  vector[N] y_hat;

  for (i in 1:N)
    y_hat[i] <- beta[1] + beta[2]*assets[i] + beta[3]*turnover[i] + beta[4]*stock[i] + b_year[year[i]] + b_country[country[i]] + b_nace[nace[i]];
 }
 model {  
  b_year ~ normal (0, sigma_year);
  b_country ~ normal (0, sigma_country);
  b_nace ~ normal (0, sigma_nace);
  beta ~ normal (0, 100);
  y ~ bernoulli_logit(y_hat);
 }
 """

 regress_dat = {
 	'N' : len(df),
 	'y' : df.onepat.tolist(),
 	'assets' : df.assets.tolist(),
 	'turnover' : df.turnover.tolist(),
 	'stock' : df.stock.tolist(),
 	'n_year' : len(pd.unique(df.year)),
 	'n_country' : len(pd.unique(df.country)),
 	'n_nace' : len(pd.unique(df.nace)),
 	'year' : year.labels.tolist(),
 	'country' : country.labels.tolist(),
 	'nace' : nace.labels.tolist()
 }

 fit = pystan.stan(model_code=logit_code, data=regress_dat, iter=25000, chains=4, n_jobs=4)
	import pandas as pd
	import pystan

	df = pd.read_csv('firms.csv')
	df = df.dropna()
	country = pd.Categorical(df.country)
	year = pd.Categorical(df.year)
	nace = pd.Categorical(df.nace)
	bvd_id = pd.Categorical(df.bvd_id)

	logit_code = """
	data {
	int<lower=0> N;
	int<lower=0> n_year;
	int<lower=0> n_country;
	int<lower=0> n_nace;
	int<lower=0,upper=n_year> year[N];
	int<lower=0,upper=n_country> country[N];
	int<lower=0,upper=n_nace> nace[N];
	vector[N] assets;
	vector[N] turnover;
	vector<lower=0>[N] stock;
	int<lower=0,upper=1> y[N];
	}
	parameters {
	vector[n_year] b_year;
	vector[n_country] b_country;
	vector[n_nace] b_nace;
	vector[4] beta;
	real mu;
	real mu_year;
	real mu_firm;
	real mu_country;
	real mu_nace;
	real<lower=0,upper=100> sigma_year;
	real<lower=0,upper=100> sigma_country;
	real<lower=0,upper=100> sigma_nace;
	}
	transformed parameters {
	vector[N] y_hat;

	for (i in 1:N)
	y_hat[i] <- beta[1] + beta[2]assets[i] + beta[3]turnover[i] + beta[4]*stock[i] + b_year[year[i]] + b_country[country[i]] + b_nace[nace[i]];
	}
	model {
	b_year ~ normal (0, sigma_year);
	b_country ~ normal (0, sigma_country);
	b_nace ~ normal (0, sigma_nace);
	beta ~ normal (0, 100);
	y ~ bernoulli_logit(y_hat);
	}
	"""

	regress_dat = {
	'N' : len(df),
	'y' : df.onepat.tolist(),
	'assets' : df.assets.tolist(),
	'turnover' : df.turnover.tolist(),
	'stock' : df.stock.tolist(),
	'n_year' : len(pd.unique(df.year)),
	'n_country' : len(pd.unique(df.country)),
	'n_nace' : len(pd.unique(df.nace)),
	'year' : year.labels.tolist(),
	'country' : country.labels.tolist(),
	'nace' : nace.labels.tolist()
	}

	fit = pystan.stan(model_code=logit_code, data=regress_dat, iter=25000, chains=4, n_jobs=4)