dmbates · May 3, 2022 18:04
diff --git a/Issue611.ipynb b/Issue611.ipynb
diff --git a/Issue611.qmd b/Issue611.qmd
 ---
 title: "Issue611"
 author: "Douglas Bates"
 jupyter: julia-1.7
 date: today
 date-format: iso
 execute:
  keep-ipynb: true
 ---

 ## Load packages and data

 ```{julia}
 using CSV, MixedModels, ProgressMeter, Tables
 ProgressMeter.ijulia_behavior(:clear);
 ```

 Recent developments in Julia make it convenient to name the weights vector `wts`, which is the name of the named argument in the `LinearMixedModel` constructor,
 ```{julia}
 dat = CSV.read("./data/model_ready_data_small.csv", columntable)
 wts = CSV.read("./data/weights.csv", columntable).weight_vector
 ```

 Also, assign a `contrasts` specification of `Grouping` to the grouping factors
 ```{julia}
 contrasts = Dict(
    :brand_category_channel_type => Grouping(),
    :channel_type_sub_brand => Grouping(),
 );
 ```

 With the `Grouping` contrast there is no need to change from an Integer to a Categorical type.
 In general I would recommend prepending a character to the number in the CSV file so that the column is read as a String or a `PooledArray`, which is like a Categorical array.

 For example,

 ```{julia}
 brand = string.('B', lpad.(dat.brand_category_channel_type, 2, '0'))
 ```

 Recently I do any permanent storage of data frames as arrow files (see [Arrow.jl](https://github.com/apache/arrow-julia)) because they retain the metadata and you don't need to go through the conversion from CSV to an internal representation followed by transformations.

 ## Initial model fits

 Before going to vector-valued random effects I would try just random intercepts to see what kinds of fits they produce.
 I write this now as a let block so that I can define the formula but it doesn't clutter up the namespace.

 ```{julia}
 m1 = let
    form = @formula(
        volume ~ 1 + ppi + base_price +
        (1|channel_type_sub_brand)
    )
    fit(MixedModel, form, dat; contrasts, wts)
 end
 ```

 As you can see, it converges on the boundary with a variance component of zero.

 ```{julia}
 println(m1)
 ```


 ```{julia}
 m2 = let
    form = @formula(
        volume ~ 1 + ppi + base_price +
        zerocorr(1 + ppi | channel_type_sub_brand)
    )
    fit(MixedModel, form, dat; contrasts, wts)
 end
 ```

 ```{julia}
 m2a = let
    form = @formula(
        volume ~ 1 + ppi + base_price +
        (0 + ppi | channel_type_sub_brand)
    )
    fit(MixedModel, form, dat; contrasts, wts)
 end
 println(m2a)
 ```


 ```{julia}
 println(m2)
 ```

 ```{julia}
 m3 = let
    form = @formula(
        volume ~ 1 + ppi + base_price +
        (1 | channel_type_sub_brand) +
        (1 | brand_category_channel_type)
    )
    fit(MixedModel, form, dat; contrasts, wts)
 end
 ```

 ```{julia}
 m4 = let
    form = @formula(
        volume ~ 1 + ppi + base_price +
        (1 | channel_type_sub_brand) +
        zerocorr(1 + ppi | brand_category_channel_type)
    )
    LinearMixedModel(form, dat; contrasts, wts)
 end
 try
    updateL!(m4)
 catch
    @warn "Cholesky factorization failed"
 end
 ```

 ```{julia}
 m5 = let
    form = @formula(
        volume ~ 1 + ppi + base_price +
        (0 + ppi | channel_type_sub_brand) +
        (0 + ppi | brand_category_channel_type)
    )
    fit(MixedModel, form, dat; contrasts, wts)
 end
 ```


 ```{julia}
 println(m5)
 ```

 So the model seems to come down to a random effect for the slope w.r.t. `ppi` by `channel_type_sub_brand`, model `m2a`, and not much else.
diff --git a/Project.toml b/Project.toml
 [deps]
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
	---
	title: "Issue611"
	author: "Douglas Bates"
	jupyter: julia-1.7
	date: today
	date-format: iso
	execute:
	keep-ipynb: true
	---

	## Load packages and data

	```{julia}
	using CSV, MixedModels, ProgressMeter, Tables
	ProgressMeter.ijulia_behavior(:clear);
	```

	Recent developments in Julia make it convenient to name the weights vector `wts`, which is the name of the named argument in the `LinearMixedModel` constructor,
	```{julia}
	dat = CSV.read("./data/model_ready_data_small.csv", columntable)
	wts = CSV.read("./data/weights.csv", columntable).weight_vector
	```

	Also, assign a `contrasts` specification of `Grouping` to the grouping factors
	```{julia}
	contrasts = Dict(
	:brand_category_channel_type => Grouping(),
	:channel_type_sub_brand => Grouping(),
	);
	```

	With the `Grouping` contrast there is no need to change from an Integer to a Categorical type.
	In general I would recommend prepending a character to the number in the CSV file so that the column is read as a String or a `PooledArray`, which is like a Categorical array.

	For example,

	```{julia}
	brand = string.('B', lpad.(dat.brand_category_channel_type, 2, '0'))
	```

	Recently I do any permanent storage of data frames as arrow files (see [Arrow.jl](https://github.com/apache/arrow-julia)) because they retain the metadata and you don't need to go through the conversion from CSV to an internal representation followed by transformations.

	## Initial model fits

	Before going to vector-valued random effects I would try just random intercepts to see what kinds of fits they produce.
	I write this now as a let block so that I can define the formula but it doesn't clutter up the namespace.

	```{julia}
	m1 = let
	form = @formula(
	volume ~ 1 + ppi + base_price +
	(1\|channel_type_sub_brand)
	)
	fit(MixedModel, form, dat; contrasts, wts)
	end
	```

	As you can see, it converges on the boundary with a variance component of zero.

	```{julia}
	println(m1)
	```


	```{julia}
	m2 = let
	form = @formula(
	volume ~ 1 + ppi + base_price +
	zerocorr(1 + ppi \| channel_type_sub_brand)
	)
	fit(MixedModel, form, dat; contrasts, wts)
	end
	```

	```{julia}
	m2a = let
	form = @formula(
	volume ~ 1 + ppi + base_price +
	(0 + ppi \| channel_type_sub_brand)
	)
	fit(MixedModel, form, dat; contrasts, wts)
	end
	println(m2a)
	```


	```{julia}
	println(m2)
	```

	```{julia}
	m3 = let
	form = @formula(
	volume ~ 1 + ppi + base_price +
	(1 \| channel_type_sub_brand) +
	(1 \| brand_category_channel_type)
	)
	fit(MixedModel, form, dat; contrasts, wts)
	end
	```

	```{julia}
	m4 = let
	form = @formula(
	volume ~ 1 + ppi + base_price +
	(1 \| channel_type_sub_brand) +
	zerocorr(1 + ppi \| brand_category_channel_type)
	)
	LinearMixedModel(form, dat; contrasts, wts)
	end
	try
	updateL!(m4)
	catch
	@warn "Cholesky factorization failed"
	end
	```

	```{julia}
	m5 = let
	form = @formula(
	volume ~ 1 + ppi + base_price +
	(0 + ppi \| channel_type_sub_brand) +
	(0 + ppi \| brand_category_channel_type)
	)
	fit(MixedModel, form, dat; contrasts, wts)
	end
	```


	```{julia}
	println(m5)
	```

	So the model seems to come down to a random effect for the slope w.r.t. `ppi` by `channel_type_sub_brand`, model `m2a`, and not much else.
	[deps]
	CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
	CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
	Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
	DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
	MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
	ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
	StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
	Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"