首页
学习
活动
专区
圈层
工具
发布
首页
学习
活动
专区
圈层
工具
MCP广场
社区首页 >问答首页 >即使没有使用图像,PyTorch也需要在DataLoader中具有类似图像的维度

即使没有使用图像,PyTorch也需要在DataLoader中具有类似图像的维度
EN

Stack Overflow用户
提问于 2020-06-14 19:46:22
回答 1查看 130关注 0票数 0

我无法让简单的神经网络与自定义虚拟数据集一起运行。您可以在此问题的最底部找到错误消息。

我想深入了解PyTorch是如何处理数据输入的,并因此构建了一个简单的数据集,该数据集将两系列布尔值(编码为0和1)作为输入,将OR、AND和XOR系列作为目标。所有内容都应该使用自定义Dataset和DataLoader (出于学习目的)。输入数据如下所示(自然,一次只使用一个目标列):

代码语言:javascript
运行
复制
      column_1  column_2  or  and  xor
0            1         1   1    1    0
1            0         1   1    0    1
2            0         1   1    0    1
3            0         1   1    0    1
4            1         0   1    0    1
...        ...       ...  ..  ...  ...
9995         1         1   1    1    0
9996         1         0   1    0    1
9997         1         0   1    0    1
9998         0         1   1    0    1
9999         0         1   1    0    1

所以我想构建一个神经网络来表示OR门,AND门,或者XOR门。

有人能解释一下为什么迭代器似乎不接受一维数据吗?看起来像是假设了类似图像的数据?有没有可能用一个定制的Dataset和一个DataLoader来解决这个问题,或者我是否必须妥协才能不使用DataLoader (就像在this intro to logic in PyTorch中那样)?

以XOR为目标列的最小工作示例:

代码语言:javascript
运行
复制
# Imports
from pandas import read_csv, DataFrame
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as torch_nn
import torch.nn.functional as torch_functional
import torchvision
import random


# Classes
class CustomDataset(Dataset):
    def __init__(self, data, target_column_name, transform=None):
        self.dataframe = data
        self.x = data[['column_1', 'column_2']].values
        self.y = data[[target_column_name]].values
        self.n_samples = len(data)
        self.transform = transform

    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        x = self.x[index]
        y = self.y[index]

        if not self.transform == None:
            return (self.transform(x), self.transform(y))
        return (x, y)

class CustomNet(torch_nn.Module):
    def __init__(self):
        super().__init__()
        self.fully_connected_input_layer = torch_nn.Linear(2, 5)
        self.fully_connected_hidden_layer_1 = torch_nn.Linear(5, 5)
        self.fully_connected_hidden_layer_2 = torch_nn.Linear(5, 5)
        self.fully_connected_output_layer = torch_nn.Linear(5, 2)

    def forward(self, data):
        data = torch_functional.relu(self.fully_connected_input_layer(data))
        data = torch_functional.relu(self.fully_connected_hidden_layer_1(data))
        data = torch_functional.relu(self.fully_connected_hidden_layer_2(data))
        data = torch_functional.log_softmax(
            self.fully_connected_output_layer(data), 
            dim=1
        )
        # data = data.squeeze(1)

        return data


# Global variables
NUMBER_OF_OBSERVATIONS = 10000
N_TRAIN_OBSERVATIONS = 7000
BATCH_SIZE = 4
N_EPOCHS = 3
random.seed(42)


# Generating logical gate data
## Generating two columns with random 0s and 1s.
df_data = DataFrame({
    'column_1': random.choices([0, 1], k=NUMBER_OF_OBSERVATIONS),
    'column_2': random.choices([0, 1], k=NUMBER_OF_OBSERVATIONS)
})
## Adding the logic gate results of the previously generated two columns.
df_data.loc[:,'or'] = (df_data['column_1'] == 1) | (df_data['column_2'] == 1)
df_data.loc[:,'or'] = df_data.loc[:,'or'].astype(int)
df_data.loc[:,'and'] = (df_data['column_1'] == 1) & (df_data['column_2'] == 1).astype(int)
df_data.loc[:,'and'] = df_data.loc[:,'and'].astype(int)
df_data.loc[:,'xor'] = ((df_data['column_1'] == 1) & (df_data['column_2'] != 1)) | ((df_data['column_1'] != 1) & (df_data['column_2'] == 1)).astype(int)
df_data.loc[:,'xor'] = df_data.loc[:,'xor'].astype(int)
print(df_data.info())
print(df_data)

# Instantiating a CustomDataSet object and a DataLoader object.
dataset_data = CustomDataset(df_data, target_column_name='xor', transform=torchvision.transforms.ToTensor())
dataset_data_train, dataset_data_test = torch.utils.data.random_split(
    dataset_data,
    [N_TRAIN_OBSERVATIONS, len(dataset_data)-N_TRAIN_OBSERVATIONS]
)
dataloader_data_train = DataLoader(
    dataset=dataset_data_train, 
    batch_size=BATCH_SIZE, 
    shuffle=True
)
dataloader_data_test = DataLoader(
    dataset=dataset_data_test, 
    batch_size=BATCH_SIZE, 
    shuffle=True
)

# Instantiating the neural network
custom_net = CustomNet()

# Running one epoch
for epoch in range(N_EPOCHS):
    for data in dataloader_data_train:
        X, y = data[0].float(), data[1].float()
        net.zero_grad()
        output = net(X)
        print(output)
        print(y)
        loss = torch_functional.nll_loss(output, y)
        loss.backward()
        optimizer.step()
        break

错误消息:

代码语言:javascript
运行
复制
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-2-f2833deb4b11> in <module>
     95 # Running one epoch
     96 for epoch in range(N_EPOCHS):
---> 97     for data in dataloader_data_train:
     98         X, y = data[0].float(), data[1].float()
     99         net.zero_grad()

~/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
    343 
    344     def __next__(self):
--> 345         data = self._next_data()
    346         self._num_yielded += 1
    347         if self._dataset_kind == _DatasetKind.Iterable and \

~/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
    383     def _next_data(self):
    384         index = self._next_index()  # may raise StopIteration
--> 385         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    386         if self._pin_memory:
    387             data = _utils.pin_memory.pin_memory(data)

~/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

~/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

~/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataset.py in __getitem__(self, idx)
    255 
    256     def __getitem__(self, idx):
--> 257         return self.dataset[self.indices[idx]]
    258 
    259     def __len__(self):

<ipython-input-2-f2833deb4b11> in __getitem__(self, index)
     26 
     27         if not self.transform == None:
---> 28             return (self.transform(x), self.transform(y))
     29         return (x, y)
     30 

~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/transforms.py in __call__(self, pic)
     90             Tensor: Converted image.
     91         """
---> 92         return F.to_tensor(pic)
     93 
     94     def __repr__(self):

~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/functional.py in to_tensor(pic)
     43 
     44     if _is_numpy(pic) and not _is_numpy_image(pic):
---> 45         raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim))
     46 
     47     if isinstance(pic, np.ndarray):

ValueError: pic should be 2/3 dimensional. Got 1 dimensions.
EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2020-06-14 20:47:13

问题是您使用的是Torchvision库中的一个专门的转换例程torchvision.transforms.ToTensor。您应该只使用torch.from_numpy

还要注意的是,Pandas对象上的.values已被弃用。您应该改用.to_numpy

代码语言:javascript
运行
复制
import pandas as pd
import torch

x_pandas = pd.Series([0.0, 0.5, 1.0])
x_numpy = x_pandas.to_numpy()
x_torch = torch.from_numpy(x_numpy)
票数 1
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/62372055

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档