Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

TypeError: expected at most 1 arguments, got 2……. data = collections.OrderedDict('data', distributed_data[i])

I am getting the following error related to this function definition what is wrong?

Convert_to_client_data() is a function in federated learning where I am trying to convert a dataset into the federated dataset.

Here is the declaration of the class Distribute which is used in the function which gives the error

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

#Declaration of Class Distribute

def partition_list (list_in, n):
    random.shuffle(list_in)
    return [list_in[i::n] for i in range(n)]

class Distribute:
    def __init__(self, data, data_type):

        self.data = data
        self.data_type = data_type.lower()
        self.selected_feature = -1
        self.type = 'iid'
        self.client_no = 10
        self.data_sample_fraction = 0.1
        self.min_user_number = 10
        self.max_user_number = 20
        self.train_data_fraction = 0.9
        self.random_sampling_seed = 4
        self.random_split_seed = 1
        self.split_type = 'sample'

    def __shuffle(self, data, label):
        random.Random(self.random_sampling_seed).shuffle(data)

    def _iid_no_clint(self):
        size = random.randrange(2, len(self.data))
        self.__shuffle(self.data)

        glist = []
        group_size = int(len(self.data) / size)
        for i in range(size):
            glist.append(self.data[group_size * i: group_size * (i + 1)])

        return glist

    def _iid_clint(self, number_of_clients):

        self.__shuffle(self.data)

        glist = []
        group_size = int(len(self.data) / number_of_clients)

        for i in range(number_of_clients):
            glist.append(self.data[group_size * i: group_size * (i + 1)])

        return glist

    def _iid(self, **kwargs):
        number_of_clients = kwargs.get('number_of_clients')
        if number_of_clients:
            return self._iid_clint(number_of_clients)
        else:
            return self._iid_no_clint()

    def _niid(self, **kwargs):

        selected_feature = kwargs.get('selected_feature', self.selected_feature)
        min_user_number = kwargs.get('min_user_number', self.min_user_number)
        max_user_number = kwargs.get('max_user_number', self.max_user_number)
        number_of_clients = kwargs.get('number_of_clients')

        data_type = kwargs.get('data_type')

        if data_type == 'image':
            if number_of_clients:
                if number_of_clients > len(self.data):
                    raise ValueError('Total number of data:', len(self.data),
                                     'is less than total number of clients specified:', number_of_clients)
                else:
                    data = self.__select_feature_image_client(number_of_clients)
            else:
                data = self.__select_feature_image_no_client(min_user_number, max_user_number)

        elif data_type == 'text':
            if number_of_clients:
                if number_of_clients > len(self.data):
                    raise ValueError('Total number of data:', len(self.data),
                                     'is less than total number of clients specified:', number_of_clients)
                else:
                    data = self.__select_feature_text_client(number_of_clients)
            else:
                data = self.__select_feature_text_no_client(min_user_number, max_user_number)

        elif data_type == 'csv':
            if number_of_clients:
                if number_of_clients > len(self.data):
                    raise ValueError('Total number of data:', len(self.data),
                                     'is less than total number of clients specified:', number_of_clients)
                else:
                    data = self.__select_feature_csv_client(number_of_clients)
            else:
                data = self.__select_feature_csv_no_client(min_user_number, max_user_number)
        else:
            raise ValueError(
                f'Given data type: "{data_type}" is not correct, choose between options "text" or "image".')

        return data

    def distribute_data(self, **kwargs):
        if kwargs.get('dist_type', self.type) == 'iid':
            return self._iid(**kwargs)
        else:
            return self._niid(**kwargs)

    def __select_feature_image_no_client(self, min_user_number, max_user_number):

        client_size = random.randint(min_user_number, max_user_number)
        grouped_data = partition_list (self.data, client_size)

        return grouped_data

    def __select_feature_image_client(self, number_of_clients):

        grouped_data = np.array_split(self.data, number_of_clients)

        return grouped_data

    def __select_feature_text_no_client(self, min_user_number, max_user_number):

        client_size = random.randint(min_user_number, max_user_number)
        grouped_data = partition_list (self.data, client_size)

        return grouped_data

    def __select_feature_text_client(self, number_of_clients):

        grouped_data = np.array_split(self.data, number_of_clients)

        return grouped_data

    def __select_feature_csv_no_client(self, min_user_number, max_user_number):

        client_size = random.randint(min_user_number, max_user_number)
        grouped_data = partition_list (self.data, client_size)

        return grouped_data

    def __select_feature_csv_client(self, number_of_clients):

        grouped_data = np.array_split(self.data, number_of_clients)

        return grouped_data

        
    def split_data(self, x, y, **kwargs):
        train_data_fraction = kwargs.get('train_data_fraction', self.train_data_fraction)
        if kwargs.get('type', self.type) == 'sample':
            return self._sample_split(x, y, train_data_fraction)
        else:
            return self._user_split(train_data_fraction)

    def _user_split(self, train_data_fraction):
        rng_seed = (self.random_split_seed if (self.random_split_seed is not None and self.random_split_seed >= 0)
                    else int(time.time()))
        rng = random.Random(rng_seed)
        # randomly sample from user_files to pick training set users
        num_users = self.client_no
        num_train_users = int(train_data_fraction * num_users)
        indices = [i for i in range(num_users)]
        train_indices = rng.sample(indices, num_train_users)
        train_blist = [False for i in range(num_users)]
        for i in train_indices:
            train_blist[i] = True
        train_user_files = []
        test_user_files = []
        train_labels = []
        test_labels = []

        for i in range(num_users):
            if train_blist[i]:
                train_user_files.append(self.data[i])
                train_labels.append(self.label[i])
            else:
                test_user_files.append(self.data[i])
                test_labels.append(self.label[i])

        return train_user_files, test_user_files, train_labels, test_labels

    def _sample_split(self, x, y, train_data_fraction):
        x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=train_data_fraction)
        return x_train, x_test, y_train, y_test


#DATA variable


    data_type = 'text'
    input_path = '/content/drive/MyDrive/Divya-Yasaman/v2/data/text/topics_sample'  # accepts either folder or csv file
    
    obj = Reader(data_type, input_path)
    
    %%time
    data = obj.read_data()
  

#function DEFINITION which gives the error

def convert_to_client_data(data, data_type, **kwargs):

    distributor_obj = Distribute(data, data_type)

    distributed_data = distributor_obj.distribute_data(data_type=data_type, **kwargs)
    

    client_train_dataset = collections.OrderedDict()

    for i in range(len(distributed_data)):
        client_name = "client_" + str(i)
        data = collections.OrderedDict('data', distributed_data[i])
       # data = collections.OrderedDict( distributed_data[i])
        client_train_dataset[client_name] = data

    print(f'Converting data to {len(distributed_data)} client data...')

    train_dataset = tff.simulation.datasets.TestClientData(client_train_dataset)

    print(f'Data successfully converted to {len(distributed_data)} client data.')

    return train_dataset

ERROR STATEMENT for the function definition

<decorator-gen-53> in time(self, line, cell, local_ns)

<timed exec> in <module>()

<ipython-input-60-7b390d37230c> in convert_to_client_data(data, data_type, **kwargs)
     13     for i in range(len(distributed_data)):
     14         client_name = "client_" + str(i)
---> 15         data = collections.OrderedDict('data', distributed_data[i])
     16        # data = collections.OrderedDict( distributed_data[i])
     17         client_train_dataset[client_name] = data

TypeError: expected at most 1 arguments, got 2

>Solution :

collections.OrderedDict() takes the same arguments as dict(): a sequence of key/value pairs to put in the dictionary. It doesn’t take the key and value as separate arguments.

If data is supposed to be the key, don’t put it as a separate argument.

data = collections.OrderedDict([('data', distributed_data[i])])

Also, as of Python 3.6, regular dictionaries retain their insertion order, so you may not need to use OrderedDict. Just write:

data = {'data': distributed_data[i]}
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading