# Getting error in the calculation in pandas

I am getting calculation errors while writing the groupby function with aggregate function in a loop.But, outside the loop everything is okay. Getting the results correctly…!

``````import pandas as pd
import numpy as np

# Example DataFrame
df = pd.DataFrame({
'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
'POP': [10, 20, 30, 40, 50, 60],
'LF': [1, 2, 3, 4, 5, 6],
'WRK': [100, 200, 300, 400, 500, 600]
})

groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]

def test(df, gby):
# Perform groupby and aggregation
groupby_columns = groupby_cols[gby]
w2 = df.groupby(groupby_columns).agg(
pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
).reset_index()

# Calculate CMULT column based on the current groupby configuration
if len(groupby_columns) == 1:
w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
else:
w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})

print(w2, groupby_columns)

for i in range(len(groupby_cols)):
if i == 0:
df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
df['no_sam'] = df.shape[0]

agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
# Group the data by the current groupby configuration and calculate the aggregates
w2 = df.agg(agg_dict).to_frame().T

print(w2, groupby_cols[i])

else:
test(df, i)
``````

This is the code , in which I am getting calculation errors. The results are –

``````POP   LF    WRK  no_sam
0  63.0  6.3  630.0     6.0 []
GroupA  pophat  lfhat  wrkhat  CMULT
0      A     7.5   0.75    75.0    0.5
1      B    10.8   1.08   108.0    0.3
2      C     2.4   0.24    24.0    0.2 ['GroupA']
GroupB  pophat  lfhat  wrkhat  CMULT
0      X     8.5   0.85    85.0    NaN
1      Y     9.5   0.95    95.0    NaN
2      Z     2.7   0.27    27.0    NaN ['GroupB']
GroupA GroupB  pophat  lfhat  wrkhat  CMULT
0      A      X     2.5   0.25    25.0    0.5
1      A      Y     5.0   0.50    50.0    0.5
2      B      X     3.6   0.36    36.0    0.3
3      B      Y     4.5   0.45    45.0    0.3
4      B      Z     2.7   0.27    27.0    0.3
5      C      X     2.4   0.24    24.0    0.2 ['GroupA', 'GroupB']
``````

But, outside the loop the results are – (You can verify it by changing the index of groupby_cols[NNNNNNNNN])

``````import pandas as pd
import numpy as np

# Example DataFrame
df = pd.DataFrame({
'GroupA': ['A', 'A', 'B', 'B', 'B', 'C'],
'GroupB': ['X', 'Y', 'Z', 'X', 'Y', 'X'],
'POP': [10, 20, 30, 40, 50, 60],
'LF': [1, 2, 3, 4, 5, 6],
'WRK': [100, 200, 300, 400, 500, 600]
})

groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]

df['CMULT'] = df.groupby(groupby_cols[i])['GroupA'].transform(lambda x: x.map({'A': 0.5, 'B': 0.3, 'C': 0.2}))

# Perform groupby and aggregation based on the current groupby configuration

w2 = df.groupby(groupby_cols[3]).agg(
pophat=('POP', lambda x: np.sum(x * df['CMULT'])),
lfhat=('LF', lambda x: np.sum(x * df['CMULT'])),
wrkhat=('WRK', lambda x: np.sum(x * df['CMULT']))
).reset_index()

print(w2)

POP       LF       WRK         no_sam
0       63.0      6.3      630.0       6.0

GroupA  pophat  lfhat  wrkhat
0      A    15.0    1.5   150.0
1      B    36.0    3.6   360.0
2      C    12.0    1.2   120.0

GroupB  pophat  lfhat  wrkhat
0      X    29.0    2.9   290.0
1      Y    25.0    2.5   250.0
2      Z     9.0    0.9    90.0

GroupA GroupB  pophat  lfhat  wrkhat
0      A      X     5.0    0.5    50.0
1      A      Y    10.0    1.0   100.0
2      B      X    12.0    1.2   120.0
3      B      Y    15.0    1.5   150.0
4      B      Z     9.0    0.9    90.0
5      C      X    12.0    1.2   120.0
``````

So, am I not understanding the groupby and aggregation properly because It is not working in the loop or The functions work differently in the loop. I have doubt about it, how can It be possible???

### >Solution :

IIUC multiple columns before loop and then aggregate `sum` only:

``````groupby_cols = [[], ['GroupA'], ['GroupB'], ['GroupA', 'GroupB']]

def test(df, gby):
# print (df)
# Perform groupby and aggregation
groupby_columns = groupby_cols[gby]
w2 = df.groupby(groupby_columns).agg(
pophat=('POP', 'sum'),
lfhat=('LF', 'sum'),
wrkhat=('WRK', 'sum')
).reset_index()
# print (w2)

# Calculate CMULT column based on the current groupby configuration
if len(groupby_columns) == 1:
w2['CMULT'] = w2[groupby_columns[0]].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
else:
w2['CMULT'] = w2['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})

print(w2, groupby_columns)

``````

``````df['CMULT'] = df['GroupA'].map({'A': 0.5, 'B': 0.3, 'C': 0.2})
df['POP'] = pd.to_numeric(df['POP']) * df['CMULT']
df['LF'] = pd.to_numeric(df['LF']) * df['CMULT']
df['WRK'] = pd.to_numeric(df['WRK']) * df['CMULT']
df['no_sam'] = df.shape[0]

for i in range(len(groupby_cols)):
if i == 0:

agg_dict = {'POP': 'sum', 'LF': 'sum', 'WRK': 'sum', 'no_sam': 'count'}
# Group the data by the current groupby configuration and calculate the aggregates
w2 = df.agg(agg_dict).to_frame().T

print(w2, groupby_cols[i])

else:
test(df, i)
``````