import pymc as pm
import numpy as np
import pandas as pd
import arviz as az
import matplotlib.pyplot as plt
#dat = pd.read_csv('data1463_fin3.csv')
為了之後還可以跑這個模型,根據原本的資料重新模擬一筆資料。
(但這邊沒考慮到原本數值之間的相關情形) 已考慮進去!!用多元常態分配模擬了。
- acd1eap \(\sim N(2.8510193121856177e-05, 0.8921482479092293)\)
- scleap \(\sim N(1.9250494837216173e-06, 0.7154395945457549)\)
- c1 \(\sim Ber(1,0.19822282980177716)\)
- c2 \(\sim Ber(1,0.11483253588516747)\)
- c3 \(\sim Ber(1,0.11551606288448393)\)
'''
[dat.acd1_eap.mean(), dat.scl_eap.mean(),dat.c1.mean(),dat.c2.mean(),dat.c3.mean()]
np.cov([dat.acd1_eap, dat.scl_eap, dat.c1, dat.c2, dat.c3])
'''
'\n[dat.acd1_eap.mean(), dat.scl_eap.mean(),dat.c1.mean(),dat.c2.mean(),dat.c3.mean()]\nnp.cov([dat.acd1_eap, dat.scl_eap, dat.c1, dat.c2, dat.c3])\n'
= np.array(
dat_mn 2.8510193121856177e-05,
[1.9250494837216173e-06,
0.19822282980177716,
0.11483253588516747,
0.11551606288448393]
)= np.array([
dat_cov 0.7959285 , 0.16304568, -0.01541732, -0.01689872, -0.02973076],
[ 0.16304568, 0.51185381, -0.05390401, -0.00531492, -0.02327021],
[ -0.01541732, -0.05390401, 0.15903925, -0.022778 , -0.02291358],
[-0.01689872, -0.00531492, -0.022778 , 0.10171555, -0.01327408],
[-0.02973076, -0.02327021, -0.02291358, -0.01327408, 0.10224199]
[
])= np.random.multivariate_normal(dat_mn, dat_cov, 1000)
dat = pd.DataFrame(dat, columns=['acd1_eap', 'scl_eap', 'c1', 'c2', 'c3'])
dat 'c1'] = dat['c1'] > 0.5
dat['c2'] = dat['c2'] > 0.5
dat['c3'] = dat['c3'] > 0.5 dat[
dat
acd1_eap | scl_eap | c1 | c2 | c3 | |
---|---|---|---|---|---|
0 | 0.166286 | 0.234901 | False | False | False |
1 | -1.781805 | 0.507308 | False | False | False |
2 | 1.508147 | 0.059536 | False | False | False |
3 | -0.357505 | -0.018872 | True | False | False |
4 | -0.104632 | -0.378684 | False | False | True |
... | ... | ... | ... | ... | ... |
995 | -0.268559 | 0.530033 | False | True | False |
996 | -0.802137 | 0.281370 | True | False | False |
997 | 0.210101 | -0.957152 | False | False | True |
998 | -0.551066 | -0.537315 | False | False | False |
999 | -1.006819 | 0.682656 | False | False | False |
1000 rows × 5 columns
'''
dat_dict = {
'acd1_eap': np.random.normal(loc=2.8510193121856177e-05, scale=0.8921482479092293, size=1000),
'scl_eap': np.random.normal(loc=1.9250494837216173e-06, scale=0.7154395945457549, size=1000),
'c1': np.random.binomial(n=1, p=0.19822282980177716, size=1000),
'c2': np.random.binomial(n=1, p=0.11483253588516747, size=1000),
'c3': np.random.binomial(n=1, p=0.11551606288448393, size=1000),
}
dat = pd.DataFrame(dat_dict)
'''
"\ndat_dict = {\n 'acd1_eap': np.random.normal(loc=2.8510193121856177e-05, scale=0.8921482479092293, size=1000),\n 'scl_eap': np.random.normal(loc=1.9250494837216173e-06, scale=0.7154395945457549, size=1000),\n 'c1': np.random.binomial(n=1, p=0.19822282980177716, size=1000),\n 'c2': np.random.binomial(n=1, p=0.11483253588516747, size=1000),\n 'c3': np.random.binomial(n=1, p=0.11551606288448393, size=1000), \n}\ndat = pd.DataFrame(dat_dict)\n"
with pm.Model() as model:
= pm.ConstantData('acd1eap', dat.acd1_eap)
acd1eap = pm.ConstantData('scleap', dat.scl_eap)
scleap = pm.ConstantData('c1', dat.c1)
c1 = pm.ConstantData('c2', dat.c2)
c2 = pm.ConstantData('c3', dat.c3)
c3
# intercept
= pm.Normal('acd1eap_Intercept', mu=0, sigma=100)
acd1eap_Intercept = pm.Normal('scleap_Intercept', mu=0, sigma=100)
scleap_Intercept
# noise
= pm.HalfCauchy("acd1eap_Sigma", 1)
acd1eap_Sigma = pm.HalfCauchy("scleap_Sigma", 1)
scleap_Sigma
# slope
= pm.Normal('acd1eap_scleap', mu=0, sigma=100)
acd1eap_scleap = pm.Normal('acd1eap_c1', mu=0, sigma=100)
acd1eap_c1 = pm.Normal('acd1eap_c2', mu=0, sigma=100)
acd1eap_c2 = pm.Normal('acd1eap_c3', mu=0, sigma=100)
acd1eap_c3 = pm.Normal('scleap_c1', mu=0, sigma=100)
scleap_c1 = pm.Normal('scleap_c2', mu=0, sigma=100)
scleap_c2 = pm.Normal('scleap_c3', mu=0, sigma=100)
scleap_c3
# likelihood
"y_likelihood", mu=acd1eap_Intercept + acd1eap_scleap * scleap + acd1eap_c1 * c1 + acd1eap_c2 * c2 + acd1eap_c3 * c3, sigma = acd1eap_Sigma, observed = acd1eap )
pm.Normal('m_likelihood', mu=scleap_Intercept + scleap_c1 * c1 + scleap_c2 * c2 + scleap_c3 * c3, sigma = scleap_Sigma, observed = scleap)
pm.Normal(
= pm.sample(2000, chains=4, cores=4) trace_med
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [acd1eap_Intercept, scleap_Intercept, acd1eap_Sigma, scleap_Sigma, acd1eap_scleap, acd1eap_c1, acd1eap_c2, acd1eap_c3, scleap_c1, scleap_c2, scleap_c3]
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 4 seconds.
100.00% [12000/12000 00:03<00:00 Sampling 4 chains, 0 divergences]
pm.model_to_graphviz(model)
az.summary(trace_med)
mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
acd1eap_Intercept | 0.058 | 0.035 | -0.007 | 0.123 | 0.000 | 0.000 | 9706.0 | 6274.0 | 1.0 |
scleap_Intercept | 0.089 | 0.029 | 0.031 | 0.142 | 0.000 | 0.000 | 9752.0 | 6676.0 | 1.0 |
acd1eap_scleap | 0.328 | 0.037 | 0.260 | 0.398 | 0.000 | 0.000 | 12934.0 | 6525.0 | 1.0 |
acd1eap_c1 | -0.000 | 0.066 | -0.120 | 0.128 | 0.001 | 0.001 | 11839.0 | 6027.0 | 1.0 |
acd1eap_c2 | -0.125 | 0.082 | -0.282 | 0.029 | 0.001 | 0.001 | 12185.0 | 6373.0 | 1.0 |
acd1eap_c3 | -0.149 | 0.086 | -0.313 | 0.009 | 0.001 | 0.001 | 11086.0 | 5530.0 | 1.0 |
scleap_c1 | -0.342 | 0.056 | -0.444 | -0.234 | 0.001 | 0.000 | 10387.0 | 6201.0 | 1.0 |
scleap_c2 | -0.039 | 0.070 | -0.165 | 0.096 | 0.001 | 0.001 | 12429.0 | 6258.0 | 1.0 |
scleap_c3 | -0.221 | 0.073 | -0.355 | -0.080 | 0.001 | 0.000 | 12738.0 | 6714.0 | 1.0 |
acd1eap_Sigma | 0.850 | 0.019 | 0.814 | 0.885 | 0.000 | 0.000 | 14507.0 | 5681.0 | 1.0 |
scleap_Sigma | 0.722 | 0.016 | 0.691 | 0.752 | 0.000 | 0.000 | 11896.0 | 5736.0 | 1.0 |