Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated

I am trying to build a multiclass text classification using Pytorch and torchtext. but I am recieving this error when ever output in last hidden layer is 2, but running fine on 1 outputdim. I know there is a problem with batchsize and Data shape. Please Suggest me what to do as I dont know the fix.
Constructing iterator:

#set batch size
BATCH_SIZE = 16

train_iterator, valid_iterator = BucketIterator.splits(
    (train_data, valid_data), 
    batch_size = BATCH_SIZE,
    sort_key = lambda x: len(x.text),
    sort_within_batch=True,
    device = device)

Model class:

class classifier(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, 
                 bidirectional, dropout):
        super(classifier,self).__init__()          
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        self.gru = nn.GRU(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout,
                           batch_first=True)
        
        self.fc1 = nn.Linear(hidden_dim * 2, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, 16)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(16, output_dim)
        self.act = nn.Sigmoid()
        
    def forward(self, text, text_lengths):

        embedded = self.embedding(text)
        #embedded = [batch size, sent_len, emb dim]
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'),batch_first=True)

        packed_output, hidden = self.gru(packed_embedded)

        hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
                
        dense_1=self.fc1(hidden)
        x = self.relu1(dense_1)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        dense_outputs = self.fc4(x)

        #Final activation function
        outputs=self.act(dense_outputs)
        
        return outputs

instantiating the model:

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

size_of_vocab = len(TEXT.vocab)
embedding_dim = 300
num_hidden_nodes = 256
num_output_nodes = 2
num_layers = 4
bidirection = True
dropout = 0.2

model = classifier(size_of_vocab, embedding_dim, num_hidden_nodes,num_output_nodes, num_layers, 
                   bidirectional = True, dropout = dropout).to(device)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
    
print(f'The model has {count_parameters(model):,} trainable parameters')

pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)

print(pretrained_embeddings.shape)

Optimizer and criterion used:

optimizer = optim.Adam(model.parameters())
criterion = nn.BCELoss()
model = model.to(device)
criterion = criterion.to(device)

Training function:

import torchmetrics as tm
metrics = tm.Accuracy()

def train(model, iterator, optimizer, criterion):
    
    #initialize every epoch 
    epoch_loss = 0
    epoch_acc = 0
    
    #set the model in training phase
    model.train()  
    
    for batch in iterator:
        
        #resets the gradients after every batch
        optimizer.zero_grad()   
        
        #retrieve text and no. of words
        text, text_lengths = batch.text   
        
        #convert to 1D tensor
        predictions = model(text, text_lengths).squeeze()  
        
        #compute the loss
        loss = criterion(predictions, batch.label)        
        
        #compute the binary accuracy
        # acc = binary_accuracy(predictions, batch.label)  
        acc = metrics(predictions,batch.label)

        #backpropage the loss and compute the gradients
        loss.backward()       
        
        #update the weights
        optimizer.step()      
        
        #loss and accuracy
        epoch_loss += loss.item()  
        epoch_acc += acc.item()    
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

Full error

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-60-eeabf5bacadf> in <module>()
      5 
      6     #train the model
----> 7     train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
      8 
      9     #evaluate the model

3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
   2906         raise ValueError(
   2907             "Using a target size ({}) that is different to the input size ({}) is deprecated. "
-> 2908             "Please ensure they have the same size.".format(target.size(), input.size())
   2909         )
   2910 

ValueError: Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated. Please ensure they have the same size.

>Solution :

What you want is CrossEntropyLoss instead of BCELoss.

Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading