import pymc as pm
import numpy as np
import pandas as pd
import arviz as az
import matplotlib.pyplot as plt#dat = pd.read_csv('data1463_fin3.csv')為了之後還可以跑這個模型,根據原本的資料重新模擬一筆資料。
(但這邊沒考慮到原本數值之間的相關情形) 已考慮進去!!用多元常態分配模擬了。
- acd1eap \(\sim N(2.8510193121856177e-05, 0.8921482479092293)\)
 - scleap \(\sim N(1.9250494837216173e-06, 0.7154395945457549)\)
 - c1 \(\sim Ber(1,0.19822282980177716)\)
 - c2 \(\sim Ber(1,0.11483253588516747)\)
 - c3 \(\sim Ber(1,0.11551606288448393)\)
 
'''
[dat.acd1_eap.mean(), dat.scl_eap.mean(),dat.c1.mean(),dat.c2.mean(),dat.c3.mean()]
np.cov([dat.acd1_eap, dat.scl_eap, dat.c1, dat.c2, dat.c3])
''''\n[dat.acd1_eap.mean(), dat.scl_eap.mean(),dat.c1.mean(),dat.c2.mean(),dat.c3.mean()]\nnp.cov([dat.acd1_eap, dat.scl_eap, dat.c1, dat.c2, dat.c3])\n'
dat_mn = np.array(
    [2.8510193121856177e-05,
     1.9250494837216173e-06,
     0.19822282980177716,
     0.11483253588516747,
     0.11551606288448393]
)
dat_cov = np.array([
    [ 0.7959285 ,  0.16304568, -0.01541732, -0.01689872, -0.02973076],
    [ 0.16304568,  0.51185381, -0.05390401, -0.00531492, -0.02327021],
    [-0.01541732, -0.05390401,  0.15903925, -0.022778  , -0.02291358],
    [-0.01689872, -0.00531492, -0.022778  ,  0.10171555, -0.01327408],
    [-0.02973076, -0.02327021, -0.02291358, -0.01327408,  0.10224199]
])
dat = np.random.multivariate_normal(dat_mn, dat_cov, 1000)
dat = pd.DataFrame(dat, columns=['acd1_eap', 'scl_eap', 'c1', 'c2', 'c3'])
dat['c1'] = dat['c1'] > 0.5
dat['c2'] = dat['c2'] > 0.5
dat['c3'] = dat['c3'] > 0.5dat| acd1_eap | scl_eap | c1 | c2 | c3 | |
|---|---|---|---|---|---|
| 0 | 0.166286 | 0.234901 | False | False | False | 
| 1 | -1.781805 | 0.507308 | False | False | False | 
| 2 | 1.508147 | 0.059536 | False | False | False | 
| 3 | -0.357505 | -0.018872 | True | False | False | 
| 4 | -0.104632 | -0.378684 | False | False | True | 
| ... | ... | ... | ... | ... | ... | 
| 995 | -0.268559 | 0.530033 | False | True | False | 
| 996 | -0.802137 | 0.281370 | True | False | False | 
| 997 | 0.210101 | -0.957152 | False | False | True | 
| 998 | -0.551066 | -0.537315 | False | False | False | 
| 999 | -1.006819 | 0.682656 | False | False | False | 
1000 rows × 5 columns
'''
dat_dict = {
    'acd1_eap': np.random.normal(loc=2.8510193121856177e-05, scale=0.8921482479092293, size=1000),
    'scl_eap': np.random.normal(loc=1.9250494837216173e-06, scale=0.7154395945457549, size=1000),
    'c1': np.random.binomial(n=1, p=0.19822282980177716, size=1000),
    'c2': np.random.binomial(n=1, p=0.11483253588516747, size=1000),
    'c3': np.random.binomial(n=1, p=0.11551606288448393, size=1000),    
}
dat = pd.DataFrame(dat_dict)
'''"\ndat_dict = {\n    'acd1_eap': np.random.normal(loc=2.8510193121856177e-05, scale=0.8921482479092293, size=1000),\n    'scl_eap': np.random.normal(loc=1.9250494837216173e-06, scale=0.7154395945457549, size=1000),\n    'c1': np.random.binomial(n=1, p=0.19822282980177716, size=1000),\n    'c2': np.random.binomial(n=1, p=0.11483253588516747, size=1000),\n    'c3': np.random.binomial(n=1, p=0.11551606288448393, size=1000),    \n}\ndat = pd.DataFrame(dat_dict)\n"
with pm.Model() as model:
    acd1eap = pm.ConstantData('acd1eap', dat.acd1_eap)
    scleap = pm.ConstantData('scleap', dat.scl_eap)
    c1 = pm.ConstantData('c1', dat.c1)
    c2 = pm.ConstantData('c2', dat.c2)
    c3 = pm.ConstantData('c3', dat.c3)
    # intercept
    acd1eap_Intercept = pm.Normal('acd1eap_Intercept', mu=0, sigma=100)
    scleap_Intercept = pm.Normal('scleap_Intercept', mu=0, sigma=100)
    
    # noise
    acd1eap_Sigma = pm.HalfCauchy("acd1eap_Sigma", 1)
    scleap_Sigma = pm.HalfCauchy("scleap_Sigma", 1)
    # slope
    acd1eap_scleap = pm.Normal('acd1eap_scleap', mu=0, sigma=100)
    acd1eap_c1 = pm.Normal('acd1eap_c1', mu=0, sigma=100)
    acd1eap_c2 = pm.Normal('acd1eap_c2', mu=0, sigma=100)
    acd1eap_c3 = pm.Normal('acd1eap_c3', mu=0, sigma=100)
    scleap_c1 = pm.Normal('scleap_c1', mu=0, sigma=100)
    scleap_c2 = pm.Normal('scleap_c2', mu=0, sigma=100)
    scleap_c3 = pm.Normal('scleap_c3', mu=0, sigma=100)
    # likelihood
    pm.Normal("y_likelihood", mu=acd1eap_Intercept + acd1eap_scleap * scleap  + acd1eap_c1 * c1 + acd1eap_c2 * c2 + acd1eap_c3 * c3, sigma =  acd1eap_Sigma, observed = acd1eap  )
    pm.Normal('m_likelihood', mu=scleap_Intercept + scleap_c1 * c1 + scleap_c2 * c2 + scleap_c3 * c3, sigma = scleap_Sigma, observed = scleap)
    
    trace_med = pm.sample(2000, chains=4, cores=4)Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [acd1eap_Intercept, scleap_Intercept, acd1eap_Sigma, scleap_Sigma, acd1eap_scleap, acd1eap_c1, acd1eap_c2, acd1eap_c3, scleap_c1, scleap_c2, scleap_c3]
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 4 seconds.
      
      100.00% [12000/12000 00:03<00:00 Sampling 4 chains, 0 divergences]
    
    
pm.model_to_graphviz(model)az.summary(trace_med)| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| acd1eap_Intercept | 0.058 | 0.035 | -0.007 | 0.123 | 0.000 | 0.000 | 9706.0 | 6274.0 | 1.0 | 
| scleap_Intercept | 0.089 | 0.029 | 0.031 | 0.142 | 0.000 | 0.000 | 9752.0 | 6676.0 | 1.0 | 
| acd1eap_scleap | 0.328 | 0.037 | 0.260 | 0.398 | 0.000 | 0.000 | 12934.0 | 6525.0 | 1.0 | 
| acd1eap_c1 | -0.000 | 0.066 | -0.120 | 0.128 | 0.001 | 0.001 | 11839.0 | 6027.0 | 1.0 | 
| acd1eap_c2 | -0.125 | 0.082 | -0.282 | 0.029 | 0.001 | 0.001 | 12185.0 | 6373.0 | 1.0 | 
| acd1eap_c3 | -0.149 | 0.086 | -0.313 | 0.009 | 0.001 | 0.001 | 11086.0 | 5530.0 | 1.0 | 
| scleap_c1 | -0.342 | 0.056 | -0.444 | -0.234 | 0.001 | 0.000 | 10387.0 | 6201.0 | 1.0 | 
| scleap_c2 | -0.039 | 0.070 | -0.165 | 0.096 | 0.001 | 0.001 | 12429.0 | 6258.0 | 1.0 | 
| scleap_c3 | -0.221 | 0.073 | -0.355 | -0.080 | 0.001 | 0.000 | 12738.0 | 6714.0 | 1.0 | 
| acd1eap_Sigma | 0.850 | 0.019 | 0.814 | 0.885 | 0.000 | 0.000 | 14507.0 | 5681.0 | 1.0 | 
| scleap_Sigma | 0.722 | 0.016 | 0.691 | 0.752 | 0.000 | 0.000 | 11896.0 | 5736.0 | 1.0 |