I have been facing connection refused error, didn't understand what's reason.
Any ideas about what's is wrong here ??
My Code:
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.evaluation import BinaryClassificationEvaluator
from synapse.ml.lightgbm import LightGBMClassifier
from synapse.ml.train import ComputeModelStatistics
import mlflow
from pyspark.ml import Pipeline
# Start MLflow experiment
mlflow.set_experiment("HyperparameterTuning")
with mlflow.start_run() as run:
# Set experiment tags to indicate hyperparameter tuning
mlflow.set_tag("run_type", "hyperparameter_tuning")
mlflow.set_tag("model_type", "LightGBM")
mlflow.set_tag("experiment_purpose", "prediction_test")
# Define the model
lgbm = LightGBMClassifier(
labelCol="label",
featuresCol="features",
featuresShapCol="shapValues",
dataTransferMode="bulk",
verbosity=1,
boostingType="gbdt",
maxBin=255,
objective="binary"
)
# Define parameter grid
paramGrid = ParamGridBuilder() \
.addGrid(lgbm.numIterations, [50, 100, 200]) \
.addGrid(lgbm.learningRate, [0.01, 0.05, 0.1]) \
.addGrid(lgbm.numLeaves, [31, 64, 128]) \
.addGrid(lgbm.isUnbalance, [True, False]) \
.build()
# Log the parameter grid as JSON
mlflow.log_dict({"param_grid": [
{"numIterations": [50, 100, 200]},
{"learningRate": [0.01, 0.05, 0.1]},
{"numLeaves": [31, 64, 128]},
{"isUnbalance": [True, False]}
]}, "param_grid.json")
# Setup CrossValidator
cv = CrossValidator(
estimator=lgbm,
estimatorParamMaps=paramGrid,
evaluator=evaluator,
numFolds=3,
parallelism=2
)
# Fit the model
cvModel = cv.fit(final_train)
# Log the best model
mlflow.spark.log_model(cvModel.bestModel, "lightgbm_best_model")
# Log best hyperparameters
best_params = cvModel.bestModel.extractParamMap()
best_params_dict = {param.name: best_params[param] for param in best_params}
mlflow.log_params(best_params_dict)
# Make predictions
predictions = cvModel.bestModel.transform(final_test)
# Calculate detailed statistics
metrics_df = ComputeModelStatistics(
evaluationMetric="classification",
labelCol="label",
scoredLabelsCol="prediction",
scoresCol="probability"
).transform(predictions)
metrics = metrics_df.first().asDict()
# Log evaluation metrics
mlflow.log_metrics({
"Accuracy": metrics["accuracy"],
"AUC": metrics["AUC"],
"Precision": metrics["precision"],
"Recall": metrics["recall"]
})
# Log confusion matrix
confusion_matrix = metrics["confusion_matrix"].toArray().tolist()
mlflow.log_dict({"confusion_matrix": confusion_matrix}, "confusion_matrix.json")
print("Hyperparameter Tuning Completed. Best Params and Metrics Logged to MLflow.")
Py4JJavaError: An error occurred while calling o37078.fit.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 10 in stage 36.0 failed 4 times, most recent failure: Lost task 10.3 in stage 36.0 (TID 3493😞
java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method)
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:412)