python – 在hyperopt中设置条件搜索空间的问题
发布时间:2020-12-20 13:17:37 所属栏目:Python 来源:网络整理
导读:我完全承认我可能在这里设置错误的条件空间但由于某种原因,我根本无法让它运行起来.我试图使用hyperopt来调整逻辑回归模型,并且根据求解器还有一些其他需要探索的参数.如果你选择了liblinear解算器,你可以选择惩罚,根据惩罚,你也可以选择双重.当我尝试在这个
我完全承认我可能在这里设置错误的条件空间但由于某种原因,我根本无法让它运行起来.我试图使用hyperopt来调整逻辑回归模型,并且根据求解器还有一些其他需要探索的参数.如果你选择了liblinear解算器,你可以选择惩罚,根据惩罚,你也可以选择双重.当我尝试在这个搜索空间上运行hyperopt时,它不断给我一个错误,因为它传递整个字典,如下所示.有任何想法吗?我得到的错误是’ValueError:Logistic回归仅支持liblinear,newton-cg,lbfgs和sag求解器,得到{‘solver’:’sag’}’这种格式在设置随机林搜索空间时起作用,所以我’我不知所措.
import numpy as np import scipy as sp import pandas as pd pd.options.display.max_columns = None pd.options.display.max_rows = None import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns sns.set(style="white") import pyodbc import statsmodels as sm from pandasql import sqldf import math from tqdm import tqdm import pickle from sklearn.preprocessing import RobustScaler,OneHotEncoder,MinMaxScaler from sklearn.utils import shuffle from sklearn.cross_validation import KFold,StratifiedKFold,cross_val_score,cross_val_predict,train_test_split from sklearn.model_selection import GridSearchCV from sklearn.model_selection import StratifiedKFold as StratifiedKFoldIt #from sklearn.grid_search import GridSearchCV,RandomizedSearchCV from sklearn.feature_selection import RFECV,VarianceThreshold,SelectFromModel,SelectKBest from sklearn.decomposition import PCA,IncrementalPCA,FactorAnalysis from sklearn.calibration import CalibratedClassifierCV from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier,GradientBoostingClassifier,AdaBoostClassifier,BaggingClassifier from sklearn.svm import SVC from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB,MultinomialNB from sklearn.linear_model import LogisticRegression,LogisticRegressionCV,SGDClassifier from sklearn.metrics import precision_recall_curve,precision_score,recall_score,accuracy_score,classification_report,confusion_matrix,f1_score,log_loss from imblearn.over_sampling import RandomOverSampler,SMOTE,ADASYN from imblearn.under_sampling import RandomUnderSampler,ClusterCentroids,NearMiss,NeighbourhoodCleaningRule,OneSidedSelection #import lightgbm as lgbm from xgboost.sklearn import XGBClassifier from hyperopt import fmin,tpe,hp,Trials,STATUS_OK space4lr = { 'C': hp.uniform('C',.0001,100.0),'solver' : hp.choice('solver',[ {'solver' : 'newton-cg',},{'solver' : 'lbfgs',{'solver' : 'sag'},{'solver' : 'liblinear','penalty' : hp.choice('penalty',[ {'penalty' : 'l1'},{'penalty' : 'l2','dual' : hp.choice('dual',[True,False])}] )},]),'fit_intercept': hp.choice('fit_intercept',['True','False']),'class_weight': hp.choice('class_weight',['balanced',None]),'max_iter': 50000,'random_state': 84,'n_jobs': 8 } lab = 0 results = pd.DataFrame() for i in feature_elims: target = 'Binary_over_3' alt_targets = ['year2_PER','year2_GP','year2_Min','year2_EFF','year2_WS/40','year2_Pts/Poss','Round','GRZ_Pick','GRZ_Player_Rating','Binary_over_2','Binary_over_3','Binary_over_4','Binary_5','Draft_Strength'] #alt_targets.remove(target) nondata_columns = ['display_name','player_global_id','season','season_','team_global_id','birth_date','Draft_Day'] nondata_columns.extend(alt_targets) AGG_SET_CART_PERC = sqldf("""SELECT * FROM AGG_SET_PLAYED_ADJ_SOS_Jan1 t1 LEFT JOIN RANKINGS t2 ON t1.[player_global_id] = t2.[player_global_id] LEFT JOIN Phys_Training t3 ON t1.[player_global_id] = t3.[player_global_id]""") AGG_SET_CART_PERC['HS_RSCI'] = AGG_SET_CART_PERC['HS_RSCI'].fillna(110) AGG_SET_CART_PERC['HS_Avg_Rank'] = AGG_SET_CART_PERC['HS_Avg_Rank'].fillna(1) AGG_SET_CART_PERC['HS_years_ranked'] = AGG_SET_CART_PERC['HS_years_ranked'].fillna(0) AGG_SET_CART_PERC = shuffle(AGG_SET_CART_PERC,random_state=8675309) rus = RandomUnderSampler(random_state=8675309) ros = RandomOverSampler(random_state=8675309) rs = RobustScaler() X = AGG_SET_CART_PERC y = X[target] X = pd.DataFrame(X.drop(nondata_columns,axis=1)) position = pd.get_dummies(X['position']) for idx,row in position.iterrows(): if row['F/C'] == 1: row['F'] = 1 row['C'] = 1 if row['G/F'] == 1: row['G'] = 1 row['F'] = 1 position = position.drop(['F/C','G/F'],axis=1) X = pd.concat([X,position],axis=1).drop(['position'],axis=1) X = rs.fit_transform(X,y=None) X = i.transform(X) def hyperopt_train_test(params): clf = LogisticRegression(**params) #cvs = cross_val_score(xgbc,X,y,scoring='recall',cv=skf).mean() skf = StratifiedKFold(y,n_folds=6,shuffle=False,random_state=1) metrics = [] tuning_met = [] accuracy = [] precision = [] recall = [] f1 = [] log = [] for i,(train,test) in enumerate(skf): X_train = X[train] y_train = y[train] X_test = X[test] y_test = y[test] X_train,y_train = ros.fit_sample(X_train,y_train) X_train,y_train = rus.fit_sample(X_train,y_train) clf.fit(X_train,y_train) y_pred = clf.predict(X_test) #tuning_met.append(precision_score(y_test,y_pred)) tuning_met.append((((precision_score(y_test,y_pred))*4) + recall_score(y_test,y_pred))/5) accuracy.append(accuracy_score(y_test,y_pred)) precision.append(precision_score(y_test,y_pred)) recall.append(recall_score(y_test,y_pred)) f1.append(f1_score(y_test,y_pred)) log.append(log_loss(y_test,y_pred)) metrics.append(sum(tuning_met) / len(tuning_met)) metrics.append(sum(accuracy) / len(accuracy)) metrics.append(sum(precision) / len(precision)) metrics.append(sum(recall) / len(recall)) metrics.append(sum(f1) / len(f1)) metrics.append(sum(log) / len(log)) return(metrics) best = 0 count = 0 def f(params): global best,count,results,lab,met met = hyperopt_train_test(params.copy()) met.append(params) met.append(featureset_labels[lab]) acc = met[0] results = results.append([met]) if acc > best: print(featureset_labels[lab],'new best:',acc,'Accuracy:',met[1],'Precision:',met[2],'Recall:',met[3],'using',params,""" """) best = acc #if results.empty is False & results.count() >= lab: # results.drop(results.index[lab]) #results = results.append([met]) else: print(acc,featureset_labels[lab],count) count = count + 1 return {'loss': -acc,'status': STATUS_OK} trials = Trials() best = fmin(f,space4lr,algo=tpe.suggest,max_evals=1000,trials=trials) print(featureset_labels[lab],' best:') print(best,""" """) lab = lab + 1 解决方法
虽然答复太晚了,但昨天我遇到了这个问题.下面提到的代码片段,我认为会帮助你.
space = hp.choice('classifier',[ { 'model': LogisticRegression,'param': { 'hyper_param_groups' :hp.choice('hyper_param_groups',[ { 'penalty':hp.choice('penalty_block1',['l2']),'solver':hp.choice('solver_block1',['newton-cg','sag','saga','lbfgs']),'multi_class':hp.choice('multi_class',['ovr','multinomial']),{ 'penalty':hp.choice('penalty_block2','solver':hp.choice('solver_block2',['liblinear']),'multi_class':hp.choice('multi_class_block2',['ovr']),{ 'penalty':hp.choice('penalty_block3',['l1']),'solver':hp.choice('solver_block3',['saga']),'multi_class':hp.choice('multi_class_block3','dual':hp.choice('dual',[False]),'class_weight':hp.choice('class_weight','random_state':hp.choice('random_state',[10,267]),'max_iter':hp.choice('max_iter',[100,500]),'verbose':hp.choice('verbose',[0]) } }]) 以及如何在可调用方法中使用它: penalty = args['param']['hyper_param_groups']['penalty'] solver = args['param']['hyper_param_groups']['solver'] multi_class = args['param']['hyper_param_groups']['multi_class'] dual = args['param']['dual'] class_weight = args['param']['class_weight'] random_state = args['param']['random_state'] max_iter = args['param']['max_iter'] verbose = args['param']['verbose'] (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |
相关内容
- 如何方便地使用Python和Pandas来匿名信息!保护客户的敏感信
- 如何通过python API将json / pickle文件转储并读入Google D
- 【python-opencv】图像平滑
- Python编程中time模块的一些关键用法解析
- 02 . Python之数据类型
- python – 无法在openERP中导入自定义模块
- 在Python中有效地使用HTMLParser
- python – pip freeze显示appdirs,包装,pyparsing,六个安
- python机器学习理论与实战(一)K近邻法
- python – Django Rest Framework:当Serializer many = Fa