Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

Getting error in the calculation in pandas

I am getting calculation errors while writing the groupby function with aggregate function in a loop.But, outside the loop everything is okay. Getting the results correctly…!

import pandas as pd
import numpy as np

# Example DataFrame
df = pd.DataFrame({
    'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
    'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
    'POP': [10, 20, 30, 40, 50, 60],
    'LF': [1, 2, 3, 4, 5, 6],
    'WRK': [100, 200, 300, 400, 500, 600]
})

groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]

def test(df, gby):
    # Perform groupby and aggregation
    groupby_columns = groupby_cols[gby]
    w2 = df.groupby(groupby_columns).agg(
        pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
        lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
        wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
    ).reset_index()

    # Calculate CMULT column based on the current groupby configuration
    if len(groupby_columns) == 1:
        w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
    else:
        w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})


    print(w2, groupby_columns)

for i in range(len(groupby_cols)):
    if i == 0:
        df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
        df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
        df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
        df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
        df['no_sam'] = df.shape[0]

        agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
        # Group the data by the current groupby configuration and calculate the aggregates
        w2 = df.agg(agg_dict).to_frame().T

        print(w2, groupby_cols[i])

    else:
        test(df, i)

This is the code , in which I am getting calculation errors. The results are –

POP   LF    WRK  no_sam
0  63.0  6.3  630.0     6.0 []
  GroupA  pophat  lfhat  wrkhat  CMULT
0      A     7.5   0.75    75.0    0.5
1      B    10.8   1.08   108.0    0.3
2      C     2.4   0.24    24.0    0.2 ['GroupA']
  GroupB  pophat  lfhat  wrkhat  CMULT
0      X     8.5   0.85    85.0    NaN
1      Y     9.5   0.95    95.0    NaN
2      Z     2.7   0.27    27.0    NaN ['GroupB']
  GroupA GroupB  pophat  lfhat  wrkhat  CMULT
0      A      X     2.5   0.25    25.0    0.5
1      A      Y     5.0   0.50    50.0    0.5
2      B      X     3.6   0.36    36.0    0.3
3      B      Y     4.5   0.45    45.0    0.3
4      B      Z     2.7   0.27    27.0    0.3
5      C      X     2.4   0.24    24.0    0.2 ['GroupA', 'GroupB']

But, outside the loop the results are – (You can verify it by changing the index of groupby_cols[NNNNNNNNN])

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

import pandas as pd
import numpy as np

# Example DataFrame
df = pd.DataFrame({
    'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
    'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
    'POP': [10, 20, 30, 40, 50, 60],
    'LF': [1, 2, 3, 4, 5, 6],
    'WRK': [100, 200, 300, 400, 500, 600]
})

groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]

df['CMULT'] = df.groupby(groupby_cols[i])['GroupA'].transform(lambda x: x.map({'A': 0.5, 'B': 0.3, 'C': 0.2}))

# Perform groupby and aggregation based on the current groupby configuration

w2 = df.groupby(groupby_cols[3]).agg(
    pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
    lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
    wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
).reset_index()

print(w2)

         POP       LF       WRK         no_sam
 0       63.0      6.3      630.0       6.0 

  GroupA  pophat  lfhat  wrkhat
0      A    15.0    1.5   150.0
1      B    36.0    3.6   360.0
2      C    12.0    1.2   120.0

  GroupB  pophat  lfhat  wrkhat
0      X    29.0    2.9   290.0
1      Y    25.0    2.5   250.0
2      Z     9.0    0.9    90.0

  GroupA GroupB  pophat  lfhat  wrkhat
0      A      X     5.0    0.5    50.0
1      A      Y    10.0    1.0   100.0
2      B      X    12.0    1.2   120.0
3      B      Y    15.0    1.5   150.0
4      B      Z     9.0    0.9    90.0
5      C      X    12.0    1.2   120.0

So, am I not understanding the groupby and aggregation properly because It is not working in the loop or The functions work differently in the loop. I have doubt about it, how can It be possible???

>Solution :

IIUC multiple columns before loop and then aggregate sum only:

groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]

def test(df, gby):
    # print (df)
    # Perform groupby and aggregation
    groupby_columns = groupby_cols[gby]
    w2 = df.groupby(groupby_columns).agg(
        pophat=('POP', 'sum'),
        lfhat=('LF', 'sum'),
        wrkhat=('WRK', 'sum')
    ).reset_index()
    # print (w2)

    # Calculate CMULT column based on the current groupby configuration
    if len(groupby_columns) == 1:
        w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
    else:
        w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})

    print(w2, groupby_columns)
    

df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
df['no_sam'] = df.shape[0]

for i in range(len(groupby_cols)):
    if i == 0:

        agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
        # Group the data by the current groupby configuration and calculate the aggregates
        w2 = df.agg(agg_dict).to_frame().T

        print(w2, groupby_cols[i])

    else:
        test(df, i)
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading