我无法让简单的神经网络与自定义虚拟数据集一起运行。您可以在此问题的最底部找到错误消息。
我想深入了解PyTorch是如何处理数据输入的,并因此构建了一个简单的数据集,该数据集将两系列布尔值(编码为0和1)作为输入,将OR、AND和XOR系列作为目标。所有内容都应该使用自定义Dataset和DataLoader (出于学习目的)。输入数据如下所示(自然,一次只使用一个目标列):
column_1 column_2 or and xor
0 1 1 1 1 0
1 0 1 1 0 1
2 0 1 1 0 1
3 0 1 1 0 1
4 1 0 1 0 1
... ... ... .. ... ...
9995 1 1 1 1 0
9996 1 0 1 0 1
9997 1 0 1 0 1
9998 0 1 1 0 1
9999 0 1 1 0 1
所以我想构建一个神经网络来表示OR门,AND门,或者XOR门。
有人能解释一下为什么迭代器似乎不接受一维数据吗?看起来像是假设了类似图像的数据?有没有可能用一个定制的Dataset和一个DataLoader来解决这个问题,或者我是否必须妥协才能不使用DataLoader (就像在this intro to logic in PyTorch中那样)?
以XOR为目标列的最小工作示例:
# Imports
from pandas import read_csv, DataFrame
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as torch_nn
import torch.nn.functional as torch_functional
import torchvision
import random
# Classes
class CustomDataset(Dataset):
def __init__(self, data, target_column_name, transform=None):
self.dataframe = data
self.x = data[['column_1', 'column_2']].values
self.y = data[[target_column_name]].values
self.n_samples = len(data)
self.transform = transform
def __len__(self):
return self.n_samples
def __getitem__(self, index):
x = self.x[index]
y = self.y[index]
if not self.transform == None:
return (self.transform(x), self.transform(y))
return (x, y)
class CustomNet(torch_nn.Module):
def __init__(self):
super().__init__()
self.fully_connected_input_layer = torch_nn.Linear(2, 5)
self.fully_connected_hidden_layer_1 = torch_nn.Linear(5, 5)
self.fully_connected_hidden_layer_2 = torch_nn.Linear(5, 5)
self.fully_connected_output_layer = torch_nn.Linear(5, 2)
def forward(self, data):
data = torch_functional.relu(self.fully_connected_input_layer(data))
data = torch_functional.relu(self.fully_connected_hidden_layer_1(data))
data = torch_functional.relu(self.fully_connected_hidden_layer_2(data))
data = torch_functional.log_softmax(
self.fully_connected_output_layer(data),
dim=1
)
# data = data.squeeze(1)
return data
# Global variables
NUMBER_OF_OBSERVATIONS = 10000
N_TRAIN_OBSERVATIONS = 7000
BATCH_SIZE = 4
N_EPOCHS = 3
random.seed(42)
# Generating logical gate data
## Generating two columns with random 0s and 1s.
df_data = DataFrame({
'column_1': random.choices([0, 1], k=NUMBER_OF_OBSERVATIONS),
'column_2': random.choices([0, 1], k=NUMBER_OF_OBSERVATIONS)
})
## Adding the logic gate results of the previously generated two columns.
df_data.loc[:,'or'] = (df_data['column_1'] == 1) | (df_data['column_2'] == 1)
df_data.loc[:,'or'] = df_data.loc[:,'or'].astype(int)
df_data.loc[:,'and'] = (df_data['column_1'] == 1) & (df_data['column_2'] == 1).astype(int)
df_data.loc[:,'and'] = df_data.loc[:,'and'].astype(int)
df_data.loc[:,'xor'] = ((df_data['column_1'] == 1) & (df_data['column_2'] != 1)) | ((df_data['column_1'] != 1) & (df_data['column_2'] == 1)).astype(int)
df_data.loc[:,'xor'] = df_data.loc[:,'xor'].astype(int)
print(df_data.info())
print(df_data)
# Instantiating a CustomDataSet object and a DataLoader object.
dataset_data = CustomDataset(df_data, target_column_name='xor', transform=torchvision.transforms.ToTensor())
dataset_data_train, dataset_data_test = torch.utils.data.random_split(
dataset_data,
[N_TRAIN_OBSERVATIONS, len(dataset_data)-N_TRAIN_OBSERVATIONS]
)
dataloader_data_train = DataLoader(
dataset=dataset_data_train,
batch_size=BATCH_SIZE,
shuffle=True
)
dataloader_data_test = DataLoader(
dataset=dataset_data_test,
batch_size=BATCH_SIZE,
shuffle=True
)
# Instantiating the neural network
custom_net = CustomNet()
# Running one epoch
for epoch in range(N_EPOCHS):
for data in dataloader_data_train:
X, y = data[0].float(), data[1].float()
net.zero_grad()
output = net(X)
print(output)
print(y)
loss = torch_functional.nll_loss(output, y)
loss.backward()
optimizer.step()
break
错误消息:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-2-f2833deb4b11> in <module>
95 # Running one epoch
96 for epoch in range(N_EPOCHS):
---> 97 for data in dataloader_data_train:
98 X, y = data[0].float(), data[1].float()
99 net.zero_grad()
~/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
343
344 def __next__(self):
--> 345 data = self._next_data()
346 self._num_yielded += 1
347 if self._dataset_kind == _DatasetKind.Iterable and \
~/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
383 def _next_data(self):
384 index = self._next_index() # may raise StopIteration
--> 385 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
386 if self._pin_memory:
387 data = _utils.pin_memory.pin_memory(data)
~/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
~/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
~/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataset.py in __getitem__(self, idx)
255
256 def __getitem__(self, idx):
--> 257 return self.dataset[self.indices[idx]]
258
259 def __len__(self):
<ipython-input-2-f2833deb4b11> in __getitem__(self, index)
26
27 if not self.transform == None:
---> 28 return (self.transform(x), self.transform(y))
29 return (x, y)
30
~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/transforms.py in __call__(self, pic)
90 Tensor: Converted image.
91 """
---> 92 return F.to_tensor(pic)
93
94 def __repr__(self):
~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/functional.py in to_tensor(pic)
43
44 if _is_numpy(pic) and not _is_numpy_image(pic):
---> 45 raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim))
46
47 if isinstance(pic, np.ndarray):
ValueError: pic should be 2/3 dimensional. Got 1 dimensions.
发布于 2020-06-14 20:47:13
问题是您使用的是Torchvision库中的一个专门的转换例程torchvision.transforms.ToTensor
。您应该只使用torch.from_numpy
。
还要注意的是,Pandas对象上的.values
已被弃用。您应该改用.to_numpy
:
import pandas as pd
import torch
x_pandas = pd.Series([0.0, 0.5, 1.0])
x_numpy = x_pandas.to_numpy()
x_torch = torch.from_numpy(x_numpy)
https://stackoverflow.com/questions/62372055
复制相似问题