Following @John Hedengren ideas I modified my code (Parameters estimation and least squares minimization - Python and Gekko) and added another set of data following the explanations from here:
How to set up GEKKO for parameter estimation from multiple independent sets of data?
However, I am not reaching a good solution, particularly for measurement 4. Have you got any ideas about how can I adjust it better? Thank you for your time and help.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
import math as math
import pandas as pd
#data set 1
t_data1 = [0.08333,0.5,1,4,22.61667]
x_data1 = [7.77e-5,7e-5,7e-5,6.22e-5,3.89e-5]
x_data1mgl = [4,3.6,3.6,3.2,2]
#data set 2
t_data4 = [0.08333,0.5,1,4,22.61667]
x_data4 = [2.92e-4,2.72e-4,2.92e-4,2.72e-4,2.14e-4]
x_data4mgl = [15,14,15,14,11]
#combine data and time in the same dataframe
data1 = pd.DataFrame({'time':t_data1,'x1':x_data1})
data4 = pd.DataFrame({'time':t_data4,'x4':x_data4})
data1.set_index('time', inplace=True)
data4.set_index('time', inplace=True)
# merge dataframes
data = data1.join(data4, how='outer')
# simulation time points
dftp = pd.DataFrame({'time':np.linspace(0,25,51)})
dftp.set_index('time', inplace=True)
# merge dataframes
df = data.join(dftp,how='outer')
# get True (1) or False (0) for measurement
#df['meas'] = (df['x'].values==df['x'].values).astype(int)
z1 = (df['x1']==df['x1']).astype(int)
z4 = (df['x4']==df['x4']).astype(int)
# replace NaN with zeros
df0 = df.fillna(value=0)
#Estimator Model
m = GEKKO(remote=True)
#m = GEKKO(remote=False) # remote=False to produce local folder with results
m.time = df.index.values
# measurements - Gekko arrays to store measured data
xm = m.Array(m.Param,2)
xm[0].value = df0['x1'].values
xm[1].value = df0['x4'].values
cl=m.Array(m.Var,2)
cl[0].value= 4e-5
cl[1].value= 1.33e-4
TOC=m.Array(m.Var,2)
TOC[0].value= 11.85
TOC[1].value= 11.85
C0=m.Array(m.Var,2)
C1=m.Array(m.Var,2)
cC1=m.Array(m.Var,2)
C2=m.Array(m.Var,2)
C2m=m.Array(m.Var,2)
cC2=m.Array(m.Var,2)
C3=m.Array(m.Var,2)
C4=m.Array(m.Var,2)
C5=m.Array(m.Var,2)
C6=m.Array(m.Var,2)
C7=m.Array(m.Var,2)
C8=m.Array(m.Var,2)
C9=m.Array(m.Var,2)
C10=m.Array(m.Var,2)
C11=m.Array(m.Var,2)
C12=m.Array(m.Var,2)
r1=m.Array(m.Var,2)
r2=m.Array(m.Var,2)
r3=m.Array(m.Var,2)
r4=m.Array(m.Var,2)
r5=m.Array(m.Var,2)
r6=m.Array(m.Var,2)
r7=m.Array(m.Var,2)
r8=m.Array(m.Var,2)
r9=m.Array(m.Var,2)
r10=m.Array(m.Var,2)
r11=m.Array(m.Var,2)
r12=m.Array(m.Var,2)
k5=m.Array(m.Var,2)
#Define GEKKO variables that determine if time point contains data to be used in regression
#index for objective (0=not measured, 1=measured)
zm = m.Array(m.Param,2)
zm[0].value=z1
zm[1].value=z4
# fit to measurement
x=m.Array(m.Var,2)
x[0].value=x_data1[0]
x[1].value=x_data4[0]
#adjustable parameters
kdoc1 = 0 #m-1h-1
kdoc2 = m.FV(1,lb=0.01,ub=10) #m-1h-1
S1 = 0
S2 = m.FV(1,lb=0.01,ub=10)
#Define Variables
for i in range(2):
#State variables
# M (mol/L)
#C0 = m.Var(cl[i])
C0[i] = m.Var(value=cl[i],lb=0,ub=1e-3)
C1[i] = m.Var(value=6.9e-6,lb=0,ub=2e-3)
C2[i] = m.Var(value=x[i],lb=0, ub=1e-3)
C2m[i] = m.Param(xm[i],lb=0)
meas = m.Param(zm[i])
m.Minimize(meas*((C2[i]-C2m[i]))**2)
C3[i] = m.Var(value=0)
C4[i] = m.Var(value=3.16e-8)
C5[i] = m.Var(value=3.83e-7)
C6[i] = m.Var(value=0)
C7[i]= m.Var(value=0)
C8[i] = m.Var(value=0)
C9[i] = m.Var(value=0)
C10[i] = m.Var(value=0)
C11[i] = m.Var(value=0)
C12[i] = m.Var(value=3.21e-3)
TOC[i] = m.Var(value=5.93)#mg-C/L
cC1[i] = m.Var(value=0)
cC2[i] = m.Var(value=0)
#temperature oC
T = 20
#Rate constants
k1 = 2040000000*math.exp(-1887/(273.15+T))*3600
k2 = 138000000*math.exp(-8800/(273.15+T))*3600
k3 = 300000*math.exp(-2010/(273.15+T))*3600
k4 = 0.00000065*3600
k6 = 26700*3600
k7 = 167*3600
k8 = 27700*3600
k9 = 8300*3600
k10 = 0
k5[i] = 37800000000*math.exp(-2169/(273.15+T))*C4[i]
+2.52e25*math.exp(-16860/(273.15+T))*C10[i]
+0.87*math.exp(-503/(273.15+T))*C9[i]
r1[i] = k1 * C0[i] * C1[i]
r2[i] = k2 * C2[i]
r3[i] = k3 * C0[i] * C2[i]
r4[i] = k4 * C3[i]
r5[i] = k5[i] * C2[i] * C2[i]
r6[i] = k6 * C3[i] * C1[i]* C4[i]
r7[i] = k7 * C3[i] * C5[i]
r8[i] = k8 * C6[i] * C3[i]
r9[i] = k9 * C6[i] * C2[i]
r10[i] = k10 * C2[i] * C3[i]
r11[i] = kdoc1*S1*TOC[i]*C2[i]/12000
r12[i] = kdoc2*S2*TOC[i]*C0[i]/12000
t = m.Param(value=m.time)
m.Equation(C0[i].dt()== -r1[i] + r2[i] - r3[i] + r4[i] + r8[i] - r12[i])
m.Equation(C1[i].dt()== -r1[i] + r2[i] + r5[i] - r6[i] + r11[i])
m.Equation(C2[i].dt()== r1[i] - r2[i] - r3[i] + r4[i] - r5[i] + r6[i] - r9[i] - r10[i] - r11[i])
m.Equation(C3[i].dt()== r3[i] - r4[i] + r5[i] - r6[i] - r7[i] - r8[i] - r10[i])
m.Equation(C4[i].dt()== 0)
m.Equation(C5[i] == 1e-14/C4[i])
m.Equation(C6[i].dt()== r7[i] - r8[i] - r9[i])
m.Equation(C7[i] == (3.16e-8*C0[i])/C4[i])
m.Equation(C1[i] == (5.01e-10*C8[i])/C4[i])
m.Equation(C11[i] == (5.01e-11*C10[i])/C4[i])
m.Equation(C9[i] == (5.01e-7*C9[i])/C4[i])
m.Equation(C10[i] == C12[i] - 2*C11[i] - C5[i] + C4[i])
m.Equation(C12[i].dt()== 0)
m.Equation(TOC[i].dt()== 0)
m.Equation(cC1[i] == 17000*C1[i])
m.Equation(cC2[i] == 51500*C2[i])
m.Equation(S1+S2<1)
#Application options
m.options.SOLVER = 3 #IPOPT solver
m.options.IMODE = 5 #Dynamic Simultaneous - estimation = MHE
m.options.EV_TYPE = 2 #absolute error
m.options.NODES = 3 #collocation nodes (2,5)
if True:
#kdoc1.STATUS=1
kdoc2.STATUS=1
#S1.STATUS=1
S2.STATUS=1
#Solve
#m.open_folder() # open folder if remote=False to see infeasibilities.txt
m.solve(disp=True)
print('Final SSE Objective: ' + str(m.options.objfcnval))
print('Solution')
#print('kdoc1 = ' + str(kdoc1.value[0]))
print('kdoc2 = ' + str(kdoc2.value[0]))
#print('S1 = ' + str(S1.value[0]))
print('S2 = ' + str(S2.value[0]))
plt.figure(1,figsize=(8,5))
plt.subplot(2,1,1)
plt.plot(m.time,C2[0],'b.--',label='Predicted 1')
plt.plot(m.time,C2[1],'r.--',label='Predicted 4')
plt.plot(t_data1,x_data1,'bx',label='Meas 1')
plt.plot(t_data4,x_data4,'rx',label='Meas 4')
plt.legend(loc='best')
plt.subplot(2,1,2)
plt.plot(m.time,cC2[0].value,'b',label ='C2_M1')
plt.plot(m.time,cC2[1].value,'r',label ='C2_M4')
plt.plot(t_data1,x_data1mgl,'bx',label='Meas 1')
plt.plot(t_data4,x_data4mgl,'rx',label='Meas 4')
#plt.plot(m.time,cC1.value,label ='C1')
plt.legend(loc='best')
plt.xlabel('time (h)')
plt.figure(2,figsize=(12,8))
plt.subplot(4,3,1)
plt.plot(m.time,C0[i].value,label ='C0')
plt.legend()
plt.subplot(4,3,2)
plt.plot(m.time,C1[i].value,label ='C1')
plt.legend()
plt.subplot(4,3,3)
plt.plot(m.time,C3[i].value,label ='C3')
plt.legend()
plt.subplot(4,3,4)
plt.plot(m.time,C4[i].value,label ='C4')
plt.legend()
plt.subplot(4,3,5)
plt.plot(m.time,C5[i].value,label ='C5')
plt.legend()
plt.subplot(4,3,6)
plt.plot(m.time,C6[i].value,label ='C6')
plt.legend()
plt.subplot(4,3,7)
plt.plot(m.time,C7[i].value,label ='C7')
plt.legend()
plt.subplot(4,3,8)
plt.plot(m.time,C8[i].value,label ='C8')
plt.legend()
plt.subplot(4,3,9)
plt.plot(m.time,C9[i].value,label ='C9')
plt.legend()
plt.xlabel('time (h)')
plt.subplot(4,3,10)
plt.plot(m.time,C10[i].value,label ='C10')
plt.legend()
plt.xlabel('time (h)')
plt.subplot(4,3,11)
plt.plot(m.time,C11[i].value,label ='C11')
plt.legend()
plt.xlabel('time (h)')
plt.subplot(4,3,12)
plt.plot(m.time,C12[i].value,label ='C12')
plt.legend()
plt.xlabel('time (h)')
plt.show()
question from:
https://stackoverflow.com/questions/65852474/parameters-estimation-with-multiple-data-sets-using-gekko