python – ipyparallel Hub连接超时与scikit中的并行CV学习
发布时间:2020-12-20 13:10:00 所属栏目:Python 来源:网络整理
导读:我正在尝试使用ipyparallel训练多个RandomForest分类器.我的设计是嵌套CV循环外部CV用于删除方差,内部GridSearchCV内置(n_jobs = -1)以找到RandomForest的最佳估计器(#forest,林中的#trees)并训练每个组合(拟合函数)与ipyparallel与sge(训练森林是最内在的一
我正在尝试使用ipyparallel训练多个RandomForest分类器.我的设计是嵌套CV循环外部CV用于删除方差,内部GridSearchCV内置(n_jobs = -1)以找到RandomForest的最佳估计器(#forest,林中的#trees)并训练每个组合(拟合函数)与ipyparallel与sge(训练森林是最内在的一步).
如果我使用n_jobs = 1运行GridSearchCV,一切都运行平稳,而且速度很慢. 我附上了代码.而错误. CV循环: kf = cross_validation.StratifiedKFold(Y,n_folds=k,shuffle=shuffle) for train_index,test_index in kf: X_train,X_test = X[train_index],X[test_index] Y_train,Y_test = Y[train_index],Y[test_index] rusCV = GridSearchCV(RUSRandomForestClassifier(n_Forests=100,n_TreesInForest=300),tune_params,cv=20,n_jobs = -1) # do work ipyparallel: c = Client(profile='sge',timeout=300) view = c.direct_view() forests = view.map_async(__trainForest,dataTupleList) forests.get() c.purge_everything() c.close(linger=True) 错误: multiprocessing.pool.RemoteTraceback: """ Traceback (most recent call last): File "/home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/externals/joblib/parallel.py",line 130,in __call__ return self.func(*args,**kwargs) File "/home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/externals/joblib/parallel.py",line 72,in __call__ return [func(*args,**kwargs) for func,args,kwargs in self.items] File "/home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/externals/joblib/parallel.py",in <listcomp> return [func(*args,kwargs in self.items] File "/home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/cross_validation.py",line 1531,in _fit_and_score estimator.fit(X_train,y_train,**fit_params) File "/home/sulantha/PycharmProjects/TwoStepArticle/Python/RUSRandomForest/RUSRandomForestClassifier.py",line 43,in fit self.trainJungle(X,Y) File "/home/sulantha/PycharmProjects/TwoStepArticle/Python/RUSRandomForest/RUSRandomForestClassifier.py",line 46,in trainJungle c = Client(url_file='/home/sulantha/.ipython/profile_sge/security/ipcontroller-client.json',profile='sge',profile_dir='/home/sulantha/.ipython/profile_sge/',timeout=300) File "/home/sulantha/anaconda3/lib/python3.4/site-packages/ipyparallel/client/client.py",line 478,in __init__ self._connect(sshserver,ssh_kwargs,timeout) File "/home/sulantha/anaconda3/lib/python3.4/site-packages/ipyparallel/client/client.py",line 598,in _connect raise error.TimeoutError("Hub connection request timed out") ipyparallel.error.TimeoutError: Hub connection request timed out During handling of the above exception,another exception occurred: Traceback (most recent call last): File "/home/sulantha/anaconda3/lib/python3.4/multiprocessing/pool.py",line 119,in worker result = (True,func(*args,**kwds)) File "/home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/externals/joblib/parallel.py",line 143,in __call__ raise TransportableException(text,e_type) sklearn.externals.joblib.my_exceptions.TransportableException: TransportableException ___________________________________________________________________________ TimeoutError Mon Feb 15 13:29:45 2016 PID: 30122 Python 3.4.4: /home/sulantha/anaconda3/bin/python ........................................................................... /home/sulantha/PycharmProjects/TwoStepArticle/Python/RUSRandomForest/RUSRandomForestClassifier.py in fit(self=RUSRandomForestClassifier(n_Forests=10,n_TreesInForest=50),X=array([[ 66.14246575,2.,1. ...1.63758022,1.44694768,1.36380531]]),Y=array([1,...0,1,0])) 38 return finalRUSX,finalRUSY 39 40 41 42 def fit(self,X,Y): ---> 43 self.trainJungle(X,Y) self.trainJungle = <bound method RUSRandomForestClassifier.trainJun...restClassifier(n_Forests=10,n_TreesInForest=50)> X = array([[ 66.14246575,1.36380531]]) Y = array([1,0]) 44 45 def trainJungle(self,Y): 46 c = Client(url_file='/home/sulantha/.ipython/profile_sge/security/ipcontroller-client.json',timeout=300) 47 view = c.direct_view() ........................................................................... /home/sulantha/PycharmProjects/TwoStepArticle/Python/RUSRandomForest/RUSRandomForestClassifier.py in trainJungle(self=RUSRandomForestClassifier(n_Forests=10,0])) 41 42 def fit(self,Y): 43 self.trainJungle(X,Y) 44 45 def trainJungle(self,Y): ---> 46 c = Client(url_file='/home/sulantha/.ipython/profile_sge/security/ipcontroller-client.json',timeout=300) c = undefined 47 view = c.direct_view() 48 def __trainForest(xyTuple): 49 (X,Y) = xyTuple 50 rf = RandomForestClassifier(n_estimators=self.n_TreesInForest,verbose=0,class_weight='auto') ........................................................................... /home/sulantha/anaconda3/lib/python3.4/site-packages/ipyparallel/client/client.py in __init__(self=<ipyparallel.client.client.Client object>,url_file='/home/sulantha/.ipython/profile_sge/security/ipcontroller-client.json',ipython_dir=None,context=<zmq.sugar.context.Context object>,debug=False,sshserver='',sshkey=None,password=None,paramiko=None,timeout=300,cluster_id=None,**extra_args={'key': b'3bf859ee-882a-4929-882a-be1edaa9e0c9','packer': 'json','signature_scheme': 'hmac-sha256','unpacker': 'json'}) 473 } 474 self._queue_handlers = {'execute_reply' : self._handle_execute_reply,475 'apply_reply' : self._handle_apply_reply} 476 477 try: --> 478 self._connect(sshserver,timeout) self._connect = <bound method Client._connect of <ipyparallel.client.client.Client object>> sshserver = '' ssh_kwargs = {'keyfile': None,'paramiko': None,'password': None} timeout = 300 479 except: 480 self.close(linger=0) 481 raise 482 ........................................................................... /home/sulantha/anaconda3/lib/python3.4/site-packages/ipyparallel/client/client.py in _connect(self=<ipyparallel.client.client.Client object>,ssh_kwargs={'keyfile': None,'password': None},timeout=300) 593 poller = zmq.Poller() 594 poller.register(self._query_socket,zmq.POLLIN) 595 # poll expects milliseconds,timeout is seconds 596 evts = poller.poll(timeout*1000) 597 if not evts: --> 598 raise error.TimeoutError("Hub connection request timed out") 599 idents,msg = self.session.recv(self._query_socket,mode=0) 600 if self.debug: 601 pprint(msg) 602 content = msg['content'] TimeoutError: Hub connection request timed out ___________________________________________________________________________ """ The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/externals/joblib/parallel.py",line 731,in retrieve self._output.extend(job.get()) File "/home/sulantha/anaconda3/lib/python3.4/multiprocessing/pool.py",line 608,in get raise self._value sklearn.externals.joblib.my_exceptions.TransportableException: TransportableException ___________________________________________________________________________ TimeoutError Mon Feb 15 13:29:45 2016 PID: 30122 Python 3.4.4: /home/sulantha/anaconda3/bin/python ........................................................................... /home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>) 67 def __init__(self,iterator_slice): 68 self.items = list(iterator_slice) 69 self._size = len(self.items) 70 71 def __call__(self): ---> 72 return [func(*args,kwargs in self.items] self.items = [(<function _fit_and_score>,(RUSRandomForestClassifier(n_Forests=10,array([[ 82.15616438,1.,0. ...1.63758022,array([0,0]),<function _passthrough_scorer>,array([ 9,10,11,12,13,14,15,16,...,164,165,166,167,168,169,170,171]),2,3,4,5,6,7,8]),{'n_Forests': 10,'n_TreesInForest': 50},{}),{'error_score': 'raise','return_parameters': True})] 73 74 def __len__(self): 75 return self._size 76 ........................................................................... /home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>) 67 def __init__(self,kwargs in self.items] func = <function _fit_and_score> args = (RUSRandomForestClassifier(n_Forests=10,{}) kwargs = {'error_score': 'raise','return_parameters': True} 73 74 def __len__(self): 75 return self._size 76 ........................................................................... /home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/cross_validation.py in _fit_and_score(estimator=RUSRandomForestClassifier(n_Forests=10,X=array([[ 82.15616438,y=array([0,scorer=<function _passthrough_scorer>,train=array([ 9,test=array([0,parameters={'n_Forests': 10,fit_params={},return_train_score=False,return_parameters=True,error_score='raise') 1526 1527 try: 1528 if y_train is None: 1529 estimator.fit(X_train,**fit_params) 1530 else: -> 1531 estimator.fit(X_train,**fit_params) estimator.fit = <bound method RUSRandomForestClassifier.fit of R...restClassifier(n_Forests=10,n_TreesInForest=50)> X_train = array([[ 66.14246575,1.36380531]]) y_train = array([1,0]) fit_params = {} 1532 1533 except Exception as e: 1534 if error_score == 'raise': 1535 raise ........................................................................... /home/sulantha/PycharmProjects/TwoStepArticle/Python/RUSRandomForest/RUSRandomForestClassifier.py in fit(self=RUSRandomForestClassifier(n_Forests=10,mode=0) 600 if self.debug: 601 pprint(msg) 602 content = msg['content'] TimeoutError: Hub connection request timed out ___________________________________________________________________________ During handling of the above exception,another exception occurred: Traceback (most recent call last): File "/home/sulantha/PycharmProjects/TwoStepArticle/Python/RUSRandomForest/runClassificationHAI2016.py",line 191,in <module> main() File "/home/sulantha/PycharmProjects/TwoStepArticle/Python/RUSRandomForest/runClassificationHAI2016.py",line 163,in main result = doRUSRFC(itemList[0]) File "/home/sulantha/PycharmProjects/TwoStepArticle/Python/RUSRandomForest/runClassificationHAI2016.py",line 77,in doRUSRFC shuffle=True,print_v=True,k=10,tune_params=tune_params,ext_x_test=analysisDict['X_test']) File "/home/sulantha/PycharmProjects/TwoStepArticle/Python/RUSRandomForest/runClassificationHAI2016.py",line 44,in CVJungle rusCV.fit(X_train,Y_train) File "/home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/grid_search.py",line 804,in fit return self._fit(X,y,ParameterGrid(self.param_grid)) File "/home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/grid_search.py",line 553,in _fit for parameters in parameter_iterable File "/home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/externals/joblib/parallel.py",line 812,in __call__ self.retrieve() File "/home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/externals/joblib/parallel.py",line 762,in retrieve raise exception sklearn.externals.joblib.my_exceptions.JoblibTimeoutError: JoblibTimeoutError ___________________________________________________________________________ --------------------------------------------------------------------------- Sub-process traceback: --------------------------------------------------------------------------- TimeoutError Mon Feb 15 13:29:45 2016 PID: 30122 Python 3.4.4: /home/sulantha/anaconda3/bin/python ........................................................................... /home/sulantha/anaconda3/lib/python3.4/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>) 67 def __init__(self,timeout=300) 47 view = c.direct_view() ........................................................................... /home/sulantha/anaconda3/lib/python3.4/site-packages/ipyparallel/client/client.py in __init__(self=<ipyparallel.client.client.Client object>,mode=0) 600 if self.debug: 601 pprint(msg) 602 content = msg['content'] TimeoutError: Hub connection request timed out ___________________________________________________________________________ 解决方法
scikit-learn tutorial表示
因此似乎所有并行化都必须在外部循环中发生. (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |