TypeError: take(): argument 'index' (position 1) must be Tensor, not numpy.ndarray

10,428

Change your input and labels to np.ndarray (see examples here).

Those will be casted to torch.Tensor when needed automatically by skorch.

All in all change your

inputs = Variable(x_traintensor)
labels = Variable(y_traintensor)

to:

inputs = x_traintensor.numpy() # assuming x is torch.Tensor
labels = y_traintensor.numpy() # assuming y is torch.Tensor

BTW. torch.Variable is deprecated, you should use torch.Tensor(data, requires_grad=True). In this case, inputs and labels do not need gradient, hence Variable is even more out of place.

Share:
10,428
vanessa orani
Author by

vanessa orani

Updated on June 21, 2022

Comments

  • vanessa orani
    vanessa orani almost 2 years

    I'm new to pytorch. I'm trying to do a cross validation, and I found the skorch library, which allow users to use sklearn functions with a torch model. So, I define a neural network class:

    torch.manual_seed(42)
    
    class Netcross(nn.Module):
    
        def __init__(self):
            super().__init__()
            self.fc1 = nn.Linear(5,30)
            self.sig1 = nn.Tanh()
            #self.dout = nn.Dropout(0.2)
            self.fc2 = nn.Linear(30,30)
            self.sig2 = nn.Sigmoid()
            self.out = nn.Linear(30, 1)
            self.out_act = nn.Sigmoid()
            #self.fc1.weight = torch.nn.Parameter(torch.rand(50,5))
    
        def forward(self, x):
            x = self.fc1(x)
            x = self.sig1(x)
            #x = self.dout(x)
            x = self.fc2(x)
            x = self.sig2(x)
            x = self.out(x)
            y = self.out_act(x)
    
            return y
    
    crossnet1 = NeuralNet(
        Netcross,
        max_epochs = 5,
        criterion=torch.nn.BCELoss,
        #user defined coeff.
        callbacks = [epoch_acc, epoch_f1, epoch_phi], 
        optimizer=torch.optim.SGD,
        optimizer__momentum=0.9,
        lr=0.85,
    )
    inputs = Variable(x_traintensor)
    labels = Variable(y_traintensor)
    
    crossnet1.fit(inputs, labels)
    

    so far everything is fine, the function returns credible results without any errors. The problem appears when I try to use the GridSearchCV function:

    from sklearn.model_selection import GridSearchCV
    
    param_grid = {'max_epochs':[5, 10, 20], 
                  'lr': [0.1, 0.65, 0.8],
                 }
    
    gs = GridSearchCV(estimator = crossnet1, param_grid = param_grid, refit = False, cv = 3, scoring = "accuracy")
    
    gs.fit(inputs, labels)
    

    I get the following error:

    TypeError                                 Traceback (most recent call last)
    <ipython-input-41-e1f3dbd9a2b0> in <module>
          3 labels1 = torch.from_numpy(np.array(labels))
          4 
    ----> 5 gs.fit(inputs1, labels1)
    
    ~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
        720                 return results_container[0]
        721 
    --> 722             self._run_search(evaluate_candidates)
        723 
        724         results = results_container[0]
    
    ~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
       1189     def _run_search(self, evaluate_candidates):
       1190         """Search all candidates in param_grid"""
    -> 1191         evaluate_candidates(ParameterGrid(self.param_grid))
       1192 
       1193 
    
    ~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
        709                                for parameters, (train, test)
        710                                in product(candidate_params,
    --> 711                                           cv.split(X, y, groups)))
        712 
        713                 all_candidate_params.extend(candidate_params)
    
    ~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
        915             # remaining jobs.
        916             self._iterating = False
    --> 917             if self.dispatch_one_batch(iterator):
        918                 self._iterating = self._original_iterator is not None
        919 
    
    ~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
        757                 return False
        758             else:
    --> 759                 self._dispatch(tasks)
        760                 return True
        761 
    
    ~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
        714         with self._lock:
        715             job_idx = len(self._jobs)
    --> 716             job = self._backend.apply_async(batch, callback=cb)
        717             # A job can complete so quickly than its callback is
        718             # called before we get here, causing self._jobs to
    
    ~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
        180     def apply_async(self, func, callback=None):
        181         """Schedule a func to be run"""
    --> 182         result = ImmediateResult(func)
        183         if callback:
        184             callback(result)
    
    ~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
        547         # Don't delay the application, to avoid keeping the input
        548         # arguments in memory
    --> 549         self.results = batch()
        550 
        551     def get(self):
    
    ~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
        223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
        224             return [func(*args, **kwargs)
    --> 225                     for func, args, kwargs in self.items]
        226 
        227     def __len__(self):
    
    ~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
        223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
        224             return [func(*args, **kwargs)
    --> 225                     for func, args, kwargs in self.items]
        226 
        227     def __len__(self):
    
    ~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
        516     start_time = time.time()
        517 
    --> 518     X_train, y_train = _safe_split(estimator, X, y, train)
        519     X_test, y_test = _safe_split(estimator, X, y, test, train)
        520 
    
    ~\Anaconda3\lib\site-packages\sklearn\utils\metaestimators.py in _safe_split(estimator, X, y, indices, train_indices)
        201             X_subset = X[np.ix_(indices, train_indices)]
        202     else:
    --> 203         X_subset = safe_indexing(X, indices)
        204 
        205     if y is not None:
    
    ~\Anaconda3\lib\site-packages\sklearn\utils\__init__.py in safe_indexing(X, indices)
        214                                    indices.dtype.kind == 'i'):
        215             # This is often substantially faster than X[indices]
    --> 216             return X.take(indices, axis=0)
        217         else:
        218             return X[indices]
    
    TypeError: take(): argument 'index' (position 1) must be Tensor, not numpy.ndarray
    

    What is wrong?