浙大疏锦行以下是代码的简洁逻辑思路数据预处理读取CSV → 标签编码、独热编码 → 众数填充缺失值 → 划分训练/测试集。基准模型训练默认随机森林记录精确率和召回率作为对比基线。随机森林多目标优化定义个体4个整型超参数。适应度最大化精确率和召回率。遗传算子两点交叉、均匀整数变异、NSGA-II选择。运行进化提取帕累托前沿。XGBoost多目标优化定义个体5个混合类型超参数。适应度同样最大化精确率和召回率。自定义变异处理浮点参数。运行进化提取前沿同时训练默认XGBoost作对比。可视化对比将两个模型的前沿与默认点绘制在同一张图观察优化效果。巧克力配方自定义优化模拟数据生成口味、卡路里公式。用随机森林回归器代理目标函数。决策变量为两种原料比例约束和为1。适应度最大化口味最小化卡路里权重为1和-1。用有界遗传算子进化得到前沿并展示配方示例。整体预处理 → 基准 → 两个模型的超参数多目标优化 → 模拟场景的代理模型优化 → 结果可视化。完整代码如下Day14 完整代码 作业一 作业二数据路径C:\Python Study\Python60DaysChallenge-main\data.csvimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport warningsimport timeimport randomwarnings.filterwarnings(‘ignore’)plt.rcParams[‘font.sans-serif’] [‘SimHei’]plt.rcParams[‘axes.unicode_minus’] False0. 数据加载与预处理data pd.read_csv(rC:\Python Study\Python60DaysChallenge-main\data.csv)标签编码home_ownership_mapping {‘Own Home’: 1, ‘Rent’: 2, ‘Have Mortgage’: 3, ‘Home Mortgage’: 4}data[‘Home Ownership’] data[‘Home Ownership’].map(home_ownership_mapping)years_in_job_mapping {‘ 1 year’: 1, ‘1 year’: 2, ‘2 years’: 3, ‘3 years’: 4, ‘4 years’: 5,‘5 years’: 6, ‘6 years’: 7, ‘7 years’: 8, ‘8 years’: 9, ‘9 years’: 10, ‘10 years’: 11}data[‘Years in current job’] data[‘Years in current job’].map(years_in_job_mapping)Purpose 独热编码data pd.get_dummies(data, columns[‘Purpose’])data2 pd.read_csv(rC:\Python Study\Python60DaysChallenge-main\data.csv)new_cols [col for col in data.columns if col not in data2.columns]for col in new_cols:data[col] data[col].astype(int)Term 映射term_mapping {‘Short Term’: 0, ‘Long Term’: 1}data[‘Term’] data[‘Term’].map(term_mapping)data.rename(columns{‘Term’: ‘Long Term’}, inplaceTrue)连续变量用众数填充缺失值continuous_features data.select_dtypes(include[‘int64’, ‘float64’]).columns.tolist()for feature in continuous_features:mode_val data[feature].mode()[0]data[feature].fillna(mode_val, inplaceTrue)划分数据集from sklearn.model_selection import train_test_splitX data.drop([‘Credit Default’], axis1)y data[‘Credit Default’]X_train, X_test, y_train, y_test train_test_split(X, y, test_size0.2, random_state42)from sklearn.metrics import precision_score, recall_score, classification_report, confusion_matrix1. 基准模型默认随机森林from sklearn.ensemble import RandomForestClassifierprint(“” * 60)print(“1. 默认随机森林基准模型”)print(“” * 60)rf_default RandomForestClassifier(random_state42)rf_default.fit(X_train, y_train)rf_pred rf_default.predict(X_test)print(classification_report(y_test, rf_pred))print(“混淆矩阵”)print(confusion_matrix(y_test, rf_pred))base_precision precision_score(y_test, rf_pred)base_recall recall_score(y_test, rf_pred)2. 多目标优化随机森林NSGA-IIfrom deap import base, creator, tools, algorithmscreator.create(“FitnessMulti”, base.Fitness, weights(1.0, 1.0))creator.create(“Individual”, list, fitnesscreator.FitnessMulti)toolbox base.Toolbox()toolbox.register(“attr_n_estimators”, random.randint, 50, 300)toolbox.register(“attr_max_depth”, random.randint, 5, 50)toolbox.register(“attr_min_samples_split”, random.randint, 2, 20)toolbox.register(“attr_min_samples_leaf”, random.randint, 1, 20)toolbox.register(“individual”, tools.initCycle, creator.Individual,(toolbox.attr_n_estimators, toolbox.attr_max_depth,toolbox.attr_min_samples_split, toolbox.attr_min_samples_leaf), n1)toolbox.register(“population”, tools.initRepeat, list, toolbox.individual)def evaluate_rf(individual):n_est, max_d, min_split, min_leaf individualmodel RandomForestClassifier(n_estimatorsn_est, max_depthmax_d,min_samples_splitmin_split, min_samples_leafmin_leaf,random_state42, n_jobs-1)model.fit(X_train, y_train)preds model.predict(X_test)p precision_score(y_test, preds)r recall_score(y_test, preds)return (p, r)toolbox.register(“evaluate”, evaluate_rf)toolbox.register(“mate”, tools.cxTwoPoint)toolbox.register(“mutate”, tools.mutUniformInt,low[50, 5, 2, 1], up[300, 50, 20, 20], indpb0.2)toolbox.register(“select”, tools.selNSGA2)print(“\n” “” * 60)print(“2. NSGA-II 优化随机森林”)print(“” * 60)stats tools.Statistics(lambda ind: ind.fitness.values)stats.register(“avg”, np.mean, axis0)stats.register(“max”, np.max, axis0)pop toolbox.population(n50)final_pop_rf, logbook_rf algorithms.eaMuPlusLambda(pop, toolbox, mu50, lambda_50, cxpb0.7, mutpb0.2, ngen20,statsstats, verboseTrue)pareto_rf tools.selBest(final_pop_rf, klen(final_pop_rf))points_rf np.array([ind.fitness.values for ind in pareto_rf])3. 作业一XGBoost 多目标优化import xgboost as xgb清理 creator 避免重复定义错误if ‘FitnessMulti’ in creator.dict:del creator.FitnessMultiif ‘Individual’ in creator.dict:del creator.Individualcreator.create(“FitnessMulti”, base.Fitness, weights(1.0, 1.0))creator.create(“Individual”, list, fitnesscreator.FitnessMulti)toolbox_xgb base.Toolbox()toolbox_xgb.register(“attr_n_estimators”, random.randint, 50, 300)toolbox_xgb.register(“attr_max_depth”, random.randint, 3, 15)toolbox_xgb.register(“attr_learning_rate”, lambda: round(random.uniform(0.01, 0.3), 4))toolbox_xgb.register(“attr_subsample”, lambda: round(random.uniform(0.6, 1.0), 4))toolbox_xgb.register(“attr_colsample_bytree”, lambda: round(random.uniform(0.6, 1.0), 4))toolbox_xgb.register(“individual”, tools.initCycle, creator.Individual,(toolbox_xgb.attr_n_estimators, toolbox_xgb.attr_max_depth,toolbox_xgb.attr_learning_rate, toolbox_xgb.attr_subsample,toolbox_xgb.attr_colsample_bytree), n1)toolbox_xgb.register(“population”, tools.initRepeat, list, toolbox_xgb.individual)def evaluate_xgb(individual):n_est, max_d, lr, subs, colsample individualmodel xgb.XGBClassifier(n_estimatorsn_est, max_depthmax_d, learning_ratelr,subsamplesubs, colsample_bytreecolsample,random_state42, use_label_encoderFalse, eval_metric‘logloss’, verbosity0)model.fit(X_train, y_train)preds model.predict(X_test)p precision_score(y_test, preds)r recall_score(y_test, preds)return (p, r)def custom_mutate_xgb(individual, indpb0.2):bounds [(50, 300), (3, 15), (0.01, 0.3), (0.6, 1.0), (0.6, 1.0)]for i, (low, up) in enumerate(bounds):if random.random() indpb:if isinstance(low, int):individual[i] random.randint(low, up)else:individual[i] round(random.uniform(low, up), 4)return individual,toolbox_xgb.register(“evaluate”, evaluate_xgb)toolbox_xgb.register(“mate”, tools.cxTwoPoint)toolbox_xgb.register(“mutate”, custom_mutate_xgb, indpb0.2)toolbox_xgb.register(“select”, tools.selNSGA2)print(“\n” “” * 60)print(“3. 作业一NSGA-II 优化 XGBoost”)print(“” * 60)pop_xgb toolbox_xgb.population(n40)stats_xgb tools.Statistics(lambda ind: ind.fitness.values)stats_xgb.register(“avg”, np.mean, axis0)stats_xgb.register(“max”, np.max, axis0)final_pop_xgb, logbook_xgb algorithms.eaMuPlusLambda(pop_xgb, toolbox_xgb, mu40, lambda_40, cxpb0.7, mutpb0.3, ngen15,statsstats_xgb, verboseTrue)pareto_xgb tools.selBest(final_pop_xgb, klen(final_pop_xgb))points_xgb np.array([ind.fitness.values for ind in pareto_xgb])XGBoost 默认基准xgb_default xgb.XGBClassifier(random_state42, use_label_encoderFalse, eval_metric‘logloss’)xgb_default.fit(X_train, y_train)xgb_pred xgb_default.predict(X_test)base_p_xgb precision_score(y_test, xgb_pred)base_r_xgb recall_score(y_test, xgb_pred)4. 可视化随机森林 vs XGBoost 帕累托前沿plt.figure(figsize(12, 9))plt.scatter(points_rf[:, 1], points_rf[:, 0],facecolors‘none’, edgecolors‘blue’, s80, label‘随机森林帕累托前沿’)plt.scatter(points_xgb[:, 1], points_xgb[:, 0],facecolors‘none’, edgecolors‘green’, s80, label‘XGBoost 帕累托前沿’)plt.scatter(base_recall, base_precision, marker‘s’, color‘red’, s150,labelf’随机森林默认 (R{base_recall:.3f}, P{base_precision:.3f})‘)plt.scatter(base_r_xgb, base_p_xgb, marker‘D’, color‘orange’, s150,labelf’XGBoost 默认 (R{base_r_xgb:.3f}, P{base_p_xgb:.3f})’)plt.xlabel(‘召回率 (Recall)’, fontsize12)plt.ylabel(‘精确率 (Precision)’, fontsize12)plt.title(‘多目标优化结果对比’, fontsize16)plt.legend()plt.grid(True, linestyle‘–’, alpha0.6)plt.show()5. 作业二巧克力配方多目标优化模拟数据print(“\n” “” * 60)print(“5. 作业二巧克力配方多目标优化”)print(“” * 60)5.1 生成模拟数据np.random.seed(42)n_samples 500x np.random.dirichlet(np.ones(3), n_samples)taste (70 * x[:, 0]**0.5 80 * x[:, 1] 30 * x[:, 2]**0.3 - 20 * x[:, 2]**2)taste taste np.random.normal(0, 3, n_samples)taste np.clip(taste, 0, 100)calories 200 * x[:, 0] 400 * x[:, 1] 350 * x[:, 2]calories calories np.random.normal(0, 10, n_samples)calories np.clip(calories, 100, 500)df pd.DataFrame(x, columns[‘cocoa’, ‘sugar’, ‘milk’])df[‘taste’] tastedf[‘calories’] calories5.2 训练代理模型from sklearn.ensemble import RandomForestRegressorX_recipe df[[‘cocoa’, ‘sugar’, ‘milk’]]reg_taste RandomForestRegressor(n_estimators100, random_state42)reg_taste.fit(X_recipe, df[‘taste’])reg_cal RandomForestRegressor(n_estimators100, random_state42)reg_cal.fit(X_recipe, df[‘calories’])5.3 定义优化问题if ‘FitnessMinMax’ in creator.dict:del creator.FitnessMinMaxif ‘Individual’ in creator.dict:del creator.Individualcreator.create(“FitnessMinMax”, base.Fitness, weights(1.0, -1.0))creator.create(“Individual”, list, fitnesscreator.FitnessMinMax)toolbox_recipe base.Toolbox()def create_individual_recipe():while True:ind creator.Individual([random.uniform(0, 1), random.uniform(0, 1)])if ind[0] ind[1] 1:return indtoolbox_recipe.register(“individual”, create_individual_recipe)toolbox_recipe.register(“population”, tools.initRepeat, list, toolbox_recipe.individual)def evaluate_recipe(individual):x1, x2 individualx3 1 - x1 - x2feat np.array([[x1, x2, x3]])taste_pred reg_taste.predict(feat)[0]cal_pred reg_cal.predict(feat)[0]return (taste_pred, cal_pred)toolbox_recipe.register(“evaluate”, evaluate_recipe)toolbox_recipe.register(“mate”, tools.cxSimulatedBinaryBounded, low[0, 0], up[1, 1], eta20.0)toolbox_recipe.register(“mutate”, tools.mutPolynomialBounded, low[0, 0], up[1, 1], eta20.0, indpb0.2)toolbox_recipe.register(“select”, tools.selNSGA2)pop_recipe toolbox_recipe.population(n100)stats_recipe tools.Statistics(lambda ind: ind.fitness.values)stats_recipe.register(“avg”, np.mean, axis0)stats_recipe.register(“max”, np.max, axis0)final_pop_recipe, logbook_recipe algorithms.eaMuPlusLambda(pop_recipe, toolbox_recipe, mu100, lambda_100, cxpb0.7, mutpb0.3, ngen50,statsstats_recipe, verboseTrue)pareto_recipe tools.selBest(final_pop_recipe, klen(final_pop_recipe))tastes_pareto [ind.fitness.values[0] for ind in pareto_recipe]cals_pareto [ind.fitness.values[1] for ind in pareto_recipe]5.4 可视化plt.figure(figsize(10, 7))plt.scatter(cals_pareto, tastes_pareto, facecolors‘none’, edgecolors‘green’, s60)plt.xlabel(‘卡路里 (kcal/100g) ← 越小越好’, fontsize12)plt.ylabel(‘口味评分 → 越大越好’, fontsize12)plt.title(‘巧克力配方的帕累托前沿口味 vs 卡路里’, fontsize14)plt.grid(True, linestyle‘–’, alpha0.6)plt.gca().invert_xaxis()plt.show()打印几个优秀配方print(“\n示例帕累托解可可粉, 糖, 牛奶粉 | 口味, 卡路里”)for i in range(min(5, len(pareto_recipe))):ind pareto_recipe[i]x3 1 - ind[0] - ind[1]print(f配方{i1}: 可可{ind[0]:.2f}, 糖{ind[1]:.2f}, 牛奶{x3:.2f} | 口味{ind.fitness.values[0]:.1f}, 卡路里{ind.fitness.values[1]:.1f})print(“\n全部代码执行完毕”)