Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

How to put values on a single raw from multiple columns in Pandas

I have been scratching my head for days about this problem. Please, find below the structure of my input data and the output that I want.
I color-coded per ID, Plot, Survey, Trial and the 3 estimation methods.
In the output, I want to get all the scorings for each group, which are represented by color, on the same row. By doing that, we should get rid of the Estimation Method column in the output. I kept it for sake of clarity.

This is my code. Thank you in advance for your time.

import pandas as pd
import functools
data_dict = {'ID': {0: 'id1',
  1: 'id1',
  2: 'id1',
  3: 'id1',
  4: 'id1',
  5: 'id1',
  6: 'id1',
  7: 'id1',
  8: 'id1',
  9: 'id1',
  10: 'id1',
  11: 'id1',
  12: 'id1',
  13: 'id1',
  14: 'id1',
  15: 'id1',
  16: 'id1',
  17: 'id1',
  18: 'id1',
  19: 'id1',
  20: 'id1',
  21: 'id1',
  22: 'id1',
  23: 'id1'},
 'Plot': {0: 'p1',
  1: 'p1',
  2: 'p1',
  3: 'p1',
  4: 'p1',
  5: 'p1',
  6: 'p1',
  7: 'p1',
  8: 'p1',
  9: 'p1',
  10: 'p1',
  11: 'p1',
  12: 'p1',
  13: 'p1',
  14: 'p1',
  15: 'p1',
  16: 'p1',
  17: 'p1',
  18: 'p1',
  19: 'p1',
  20: 'p1',
  21: 'p1',
  22: 'p1',
  23: 'p1'},
 'Survey': {0: 'Sv1',
  1: 'Sv1',
  2: 'Sv1',
  3: 'Sv1',
  4: 'Sv1',
  5: 'Sv1',
  6: 'Sv2',
  7: 'Sv2',
  8: 'Sv2',
  9: 'Sv2',
  10: 'Sv2',
  11: 'Sv2',
  12: 'Sv1',
  13: 'Sv1',
  14: 'Sv1',
  15: 'Sv1',
  16: 'Sv1',
  17: 'Sv1',
  18: 'Sv2',
  19: 'Sv2',
  20: 'Sv2',
  21: 'Sv2',
  22: 'Sv2',
  23: 'Sv2'},
 'Trial': {0: 't1',
  1: 't1',
  2: 't1',
  3: 't2',
  4: 't2',
  5: 't2',
  6: 't1',
  7: 't1',
  8: 't1',
  9: 't2',
  10: 't2',
  11: 't2',
  12: 't1',
  13: 't1',
  14: 't1',
  15: 't2',
  16: 't2',
  17: 't2',
  18: 't1',
  19: 't1',
  20: 't1',
  21: 't2',
  22: 't2',
  23: 't2'},
 'Mission': {0: 'mission1',
  1: 'mission1',
  2: 'mission1',
  3: 'mission1',
  4: 'mission1',
  5: 'mission1',
  6: 'mission1',
  7: 'mission1',
  8: 'mission1',
  9: 'mission1',
  10: 'mission1',
  11: 'mission2',
  12: 'mission2',
  13: 'mission2',
  14: 'mission2',
  15: 'mission2',
  16: 'mission2',
  17: 'mission2',
  18: 'mission2',
  19: 'mission2',
  20: 'mission2',
  21: 'mission2',
  22: 'mission2',
  23: 'mission2'},
 'Estimation Method': {0: 'MCARI2',
  1: 'NDVI',
  2: 'NDRE',
  3: 'MCARI2',
  4: 'NDVI',
  5: 'NDRE',
  6: 'MCARI2',
  7: 'NDVI',
  8: 'NDRE',
  9: 'MCARI2',
  10: 'NDVI',
  11: 'NDRE',
  12: 'MCARI2',
  13: 'NDVI',
  14: 'NDRE',
  15: 'MCARI2',
  16: 'NDVI',
  17: 'NDRE',
  18: 'MCARI2',
  19: 'NDVI',
  20: 'NDRE',
  21: 'MCARI2',
  22: 'NDVI',
  23: 'NDRE'},
 'MCARI2_sd': {0: 1.5,
  1: np.nan,
  2: np.nan,
  3: 10.0,
  4: np.nan,
  5: np.nan,
  6: 1.5,
  7: np.nan,
  8: np.nan,
  9: 10.0,
  10: np.nan,
  11: np.nan,
  12: 101.0,
  13: np.nan,
  14: np.nan,
  15: 23.5,
  16: np.nan,
  17: np.nan,
  18: 111.0,
  19: np.nan,
  20: np.nan,
  21: 72.0,
  22: np.nan,
  23: np.nan},
 'MACRI2_50': {0: 12.4,
  1: np.nan,
  2: np.nan,
  3: 11.0,
  4: np.nan,
  5: np.nan,
  6: 12.4,
  7: np.nan,
  8: np.nan,
  9: 11.0,
  10: np.nan,
  11: np.nan,
  12: 102.0,
  13: np.nan,
  14: np.nan,
  15: 2.1,
  16: np.nan,
  17: np.nan,
  18: 112.0,
  19: np.nan,
  20: np.nan,
  21: 74.0,
  22: np.nan,
  23: np.nan},
 'MACRI2_AVG': {0: 15.0,
  1: np.nan,
  2: np.nan,
  3: 12.0,
  4: np.nan,
  5: np.nan,
  6: 15.0,
  7: np.nan,
  8: np.nan,
  9: 12.0,
  10: np.nan,
  11: np.nan,
  12: 103.0,
  13: np.nan,
  14: np.nan,
  15: 24.0,
  16: np.nan,
  17: np.nan,
  18: 113.0,
  19: np.nan,
  20: np.nan,
  21: 77.0,
  22: np.nan,
  23: np.nan},
 'NDVI_sd': {0: np.nan,
  1: 2.9,
  2: np.nan,
  3: np.nan,
  4: 20.0,
  5: np.nan,
  6: np.nan,
  7: 2.9,
  8: np.nan,
  9: np.nan,
  10: 20.0,
  11: np.nan,
  12: np.nan,
  13: 201.0,
  14: np.nan,
  15: np.nan,
  16: 11.0,
  17: np.nan,
  18: np.nan,
  19: 200.0,
  20: np.nan,
  21: np.nan,
  22: 32.0,
  23: np.nan},
 'NDVI_50': {0: np.nan,
  1: 21.0,
  2: np.nan,
  3: np.nan,
  4: 21.0,
  5: np.nan,
  6: np.nan,
  7: 21.0,
  8: np.nan,
  9: np.nan,
  10: 21.0,
  11: np.nan,
  12: np.nan,
  13: 201.0,
  14: np.nan,
  15: np.nan,
  16: 12.0,
  17: np.nan,
  18: np.nan,
  19: 300.0,
  20: np.nan,
  21: np.nan,
  22: 39.0,
  23: np.nan},
 'NDVI_AVG': {0: np.nan,
  1: 27.0,
  2: np.nan,
  3: np.nan,
  4: 22.0,
  5: np.nan,
  6: np.nan,
  7: 27.0,
  8: np.nan,
  9: np.nan,
  10: 22.0,
  11: np.nan,
  12: np.nan,
  13: 203.0,
  14: np.nan,
  15: np.nan,
  16: 13.0,
  17: np.nan,
  18: np.nan,
  19: 400.0,
  20: np.nan,
  21: np.nan,
  22: 40.0,
  23: np.nan},
 'NDRE_sd': {0: np.nan,
  1: np.nan,
  2: 3.1,
  3: np.nan,
  4: np.nan,
  5: 31.0,
  6: np.nan,
  7: np.nan,
  8: 3.1,
  9: np.nan,
  10: np.nan,
  11: 31.0,
  12: np.nan,
  13: np.nan,
  14: 301.0,
  15: np.nan,
  16: np.nan,
  17: 15.0,
  18: np.nan,
  19: np.nan,
  20: 57.0,
  21: np.nan,
  22: np.nan,
  23: 21.0},
 'NDRE_50': {0: np.nan,
  1: np.nan,
  2: 33.0,
  3: np.nan,
  4: np.nan,
  5: 32.0,
  6: np.nan,
  7: np.nan,
  8: 33.0,
  9: np.nan,
  10: np.nan,
  11: 32.0,
  12: np.nan,
  13: np.nan,
  14: 302.0,
  15: np.nan,
  16: np.nan,
  17: 16.0,
  18: np.nan,
  19: np.nan,
  20: 58.0,
  21: np.nan,
  22: np.nan,
  23: 22.0},
 'NDRE_AVG': {0: np.nan,
  1: np.nan,
  2: 330.0,
  3: np.nan,
  4: np.nan,
  5: 33.0,
  6: np.nan,
  7: np.nan,
  8: 330.0,
  9: np.nan,
  10: np.nan,
  11: 33.0,
  12: np.nan,
  13: np.nan,
  14: 303.0,
  15: np.nan,
  16: np.nan,
  17: 17.0,
  18: np.nan,
  19: np.nan,
  20: 59.0,
  21: np.nan,
  22: np.nan,
  23: 32.0}}

df_test = pd.DataFrame(data_dict)


def generate_data_per_EM(df):
    data_survey = []
    for (survey,mission,trial,em),data in df.groupby(['Survey','Mission','Trial','Estimation Method']):               
        df_em = data.set_index('ID').dropna(axis=1)
        df_em.to_csv(f'tmp_data_{survey}_{mission}_{trial}_{em}.csv') #This generates 74 files, but not sure how to join/merge them
        data_survey.append(df_em)

    #Merge the df_em column-wise
    df_final = functools.reduce(lambda left, right: pd.merge(left, right, on=['ID','Survey','Mission','Trial']), data_survey)
    df_final.to_csv(f'final_{survey}_{mission}_{em}.csv') #Output is not what I expected

generate_data_per_EM(df_test) 

my data and the output

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

>Solution :

IIUC, you need a groupby:

(df_test
.groupby(['ID', 'Plot', 'Survey', 'Trial','Mission'], as_index=False, sort=False)
.first(numeric_only=True)

    ID Plot Survey Trial   Mission  MCARI2_sd  MACRI2_50  MACRI2_AVG  NDVI_sd  NDVI_50  NDVI_AVG  NDRE_sd  NDRE_50  NDRE_AVG
0  id1   p1    Sv1    t1  mission1        1.5       12.4        15.0      2.9     21.0      27.0      3.1     33.0     330.0
1  id1   p1    Sv1    t2  mission1       10.0       11.0        12.0     20.0     21.0      22.0     31.0     32.0      33.0
2  id1   p1    Sv2    t1  mission1        1.5       12.4        15.0      2.9     21.0      27.0      3.1     33.0     330.0
3  id1   p1    Sv2    t2  mission1       10.0       11.0        12.0     20.0     21.0      22.0      NaN      NaN       NaN
4  id1   p1    Sv2    t2  mission2       72.0       74.0        77.0     32.0     39.0      40.0     31.0     32.0      33.0
5  id1   p1    Sv1    t1  mission2      101.0      102.0       103.0    201.0    201.0     203.0    301.0    302.0     303.0
6  id1   p1    Sv1    t2  mission2       23.5        2.1        24.0     11.0     12.0      13.0     15.0     16.0      17.0
7  id1   p1    Sv2    t1  mission2      111.0      112.0       113.0    200.0    300.0     400.0     57.0     58.0      59.0
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading