Merge and find the diff in df

Advertisements

Compare if A column is present in C, if not the type Vnot found. if present then compare B and D. if not present type T not found else NA.

import pandas as pd
data1 = {'A': [1, 2, 3],
         'B': ['p', 's', 'r']}

data2 = {'C': ['3', '2', '5'],
         'D': ['p', 'r', 'q']}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

# Function to compare and generate new column
def compare_columns(row):
    if row['A'] in df2['C'].astype(int).values:
        idx = df2[df2['C'].astype(int) == row['A']].index[0]
        if row['B'] == df2.loc[idx, 'D']:
            return 'NA'
        else:
            return 'T Not Found'
    else:
        return 'V Not Found'

# Apply the function to generate the new column 'E'
df1['E'] = df1.apply(compare_columns, axis=1)
df1.loc[df1['E'] == 'V Not Found', 'E'] = 'V not found'
df1.loc[df1['E'] == 'T Not Found', 'E'] = 'T Not Found'
df1.loc[df1['E'] == 'NA', 'E'] = 'NA'

print(df1)

expected output:

`   A  B            E
0  1  p  V not found
1  2  s  T Not Found
2  3  r  NA

`
i want the expected output as given above but i get

`   A  B            E
0  1  p  V not found
1  2  s  T Not Found
2  3  r  T Not Found`

>Solution :

It seems to me that you can use essentially the same code to test if "A" in "C" and if "B" in "D"

import pandas as pd

df1 = pd.DataFrame({
    'A': [1, 2, 3],
    'B': ['p', 's', 'r']
})

df2 = pd.DataFrame({
    'C': ['3', '2', '5'],
    'D': ['p', 'r', 'q']
})

def compare_columns(row):
    if row['A'] not in df2['C'].astype(int).values:
        return 'V Not Found'
    
    if row['B'] not in df2['D'].astype(str).values:
        return 'T Not Found'

    return 'NA'

df1['E'] = df1.apply(compare_columns, axis=1)

print(df1)

That should give you:

   A  B            E
0  1  p  V Not Found
1  2  s  T Not Found
2  3  r           NA

Leave a ReplyCancel reply