import numpy as np
import pandas as pd
# Setting the parameters
n = 100 # Total number of participants
t = 3 # Number of time points
n_per_group = n // 2 # Number of participants per treatment condition
# Generating the time and treatment variables
time = np.tile([0, 1, 2], n) # Time points for each participant
tx = np.repeat([1, 0], repeats=n_per_group * t) # Treatment condition
# Coefficients for the model
beta_10 = 0 # Overall intercept for depression
beta_11 = 0 # Rate of change in depression for control
beta_12 = 0 # Mean difference in depression at baseline between CBT and control
beta_13 = -0.5 # Treatment effect for depression
beta_20 = 0 # Overall intercept for quality of life
beta_21 = 0 # Rate of change in quality of life for control
beta_22 = 0 # Mean difference in quality of life at baseline between CBT and control
beta_23 = 0 # Treatment effect for quality of life (none)
# Random effects variance-covariance matrix (multivariate model)
sigma_G = np.array([
[0.2, 0.05, 0.1, 0.05], # Covariances between u1, v1, u2, v2
[0.05, 0.2, 0.05, 0.1],
[0.1, 0.05, 0.2, 0.05],
[0.05, 0.1, 0.05, 0.2]
])
# Residual errors variance-covariance matrix
sigma_R = np.array([
[0.1, 0.05], # Covariance between e1 and e2
[0.05, 0.1]
])
# Simulating random effects
random_effects = np.random.multivariate_normal([0, 0, 0, 0], sigma_G, n)
# Simulating residual errors
residual_errors = np.random.multivariate_normal([0, 0], sigma_R, n * t)
# Calculating the outcomes
y1 = beta_10 + beta_11 * time + beta_12 * tx + beta_13 * time * tx + random_effects.repeat(t,
axis=0)[:, 0] + residual_errors[:, 0]
y2 = beta_20 + beta_21 * time + beta_22 * tx + beta_23 * time * tx + random_effects.repeat(t,
axis=0)[:, 2] + residual_errors[:, 1]
# Creating a DataFrame
data = pd.DataFrame({
'ParticipantID': np.repeat(np.arange(n), t),
'Time': time,
'Treatment': tx,
'Depression': y1,
'QualityOfLife': y2
})
data.head() # Displaying the first few rows of the dataset