I am trying to develop a Neural network regression model using spark pipeline and elephas estimator. After developing model getting the following error. Most of the work using elephas in classification problem. Please suggest some help
model = Sequential()
model.add(Dense(512, input_shape=(input_dim,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('linear'))
model.compile(optimizer='adam', loss='mean_absolute_error')
sgd = optimizers.SGD(lr=0.01)
sgd_conf = optimizers.serialize(sgd)
# Initialize Elephas Spark ML Estimator
estimator = ElephasEstimator()
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(sgd_conf)
estimator.set_mode("synchronous")
estimator.set_loss("mean_absolute_error")
estimator.set_metrics(['mae'])
#estimator.setFeaturesCol("scaled_features")
#estimator.setLabelCol("index_category")
estimator.set_epochs(10)
estimator.set_batch_size(128)
estimator.set_num_workers(1)
estimator.set_verbosity(0)
estimator.set_validation_split(0.15)
estimator.set_categorical_labels(False)
# estimator.set_nb_classes(nb_classes)
# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[estimator])
fitted_pipeline = pipeline.fit(train_data)
# Evaluate Spark model
prediction = fitted_pipeline.transform(train_data)
pnl = prediction.select("redshift", "prediction")
pnl.show(2)
Error I am getting
AnalysisException: cannot resolve '`features`' given input columns: [temp_table.assembled_inputs, temp_table.redshift]; line 1 pos 7;
'Project ['features AS features#736, 'label AS label#737]
+- SubqueryAlias temp_table
+- Sample 0.0, 0.8, false, 1234
+- Sort [assembled_inputs#434 ASC NULLS FIRST, redshift#48 ASC NULLS FIRST], false
+- Project [assembled_inputs#434, redshift#48]
+- Sort [_nondeterministic#537 ASC NULLS FIRST], true
+- Project [assembled_inputs#434, redshift#48, rand(9048580245342677621) AS _nondeterministic#537]
+- Project [assembled_inputs#434, redshift#48]
+- Project [modelMag_u#16, modelMag_g#17, modelMag_r#18, modelMag_i#19, modelMag_z#20, modelMag_ug#21, modelMag_gr#22, modelMag_ri#23, modelMag_iz#24, fiberMag_u#25, fiberMag_g#26, fiberMag_r#27, fiberMag_i#28, fiberMag_z#29, fiberMag_ug#30, fiberMag_gr#31, fiberMag_ri#32, fiberMag_iz#33, petroR50_rr#34, petroR90_zz#35, ri#36, iz#37, dered_u#38, dered_g#39, ... 10 more fields]
+- Relation[modelMag_u#16,modelMag_g#17,modelMag_r#18,modelMag_i#19,modelMag_z#20,modelMag_ug#21,modelMag_gr#22,modelMag_ri#23,modelMag_iz#24,fiberMag_u#25,fiberMag_g#26,fiberMag_r#27,fiberMag_i#28,fiberMag_z#29,fiberMag_ug#30,fiberMag_gr#31,fiberMag_ri#32,fiberMag_iz#33,petroR50_rr#34,petroR90_zz#35,ri#36,iz#37,dered_u#38,dered_g#39,... 9 more fields] csv
question from:
https://stackoverflow.com/questions/65840599/issue-facing-while-developing-regression-model-using-elephas-estimator-any-solu