from metagpt.tools.libs.data_preprocess import FillMissingValue, StandardScale, OneHotEncode
fill_age = FillMissingValue(features=['Age'], strategy='median')
fill_embarked = FillMissingValue(features=['Embarked'], strategy='most_frequent')
train_data = fill_age.fit_transform(train_data)
train_data = fill_embarked.fit_transform(train_data)
if 'Cabin' in train_data.columns:
train_data.drop('Cabin', axis=1, inplace=True)
numerical_features = ['Age', 'SibSp', 'Parch', 'Fare']
scale = StandardScale(features=numerical_features)
scaled_features = scale.fit_transform(train_data)
for feature in numerical_features:
train_data[feature] = scaled_features[feature]
categorical_features = ['Sex', 'Embarked']
one_hot = OneHotEncode(features=categorical_features)
train_data = one_hot.fit_transform(train_data)
non_informative_columns = ['PassengerId', 'Name', 'Ticket']
train_data.drop(columns=non_informative_columns, axis=1, inplace=True, errors='ignore')