dataframe uploaded
-
The data frame consist of 3 columns latitude , longitude , & response . The objective
is to traverse the Response column and in that there is an estimate key -
estimate key contains multiple arrays , on which i have to pick the (store external id & provider) .
-
while traversing the imp point is (lat&lng) of that particular row have to get mapped with the (store external id & provider ) only with that row .
for example :
data frame image
look at the second row it contains the value ( 34.081613, -84.278921)
printing the estimate response
{'store-boundary-dsp': {'boundaries': [{'createdAt': '2020-07-15T19:21:03.000Z',
'name': '801-Primary',
'selected': True,
'storeExternalId': '801',
'tags': []},
{'createdAt': '2021-09-14T15:24:28.000Z',
'name': '801-Primary',
'storeExternalId': '801',
'tags': ['shiptprimary']}],
'distances': [{'distance': 2.6, 'storeExternalId': '801', 'unit': 'miles'}],
'errors': [],
'estimates': [{'_id': None,
'amount': 800,
'expires': None,
'fee': None,
'noEstimate': False,
'orderId': '5232197187827800',
'orderType': None,
'provider': 'Instacart',
'ruleApplied': False,
'serviceType': 'Delivery',
'storeExternalId': '801',
'supportsAlternateLocation': False,
'type': 'delivery'},
{'_id': None,
'noEstimate': False,
'orderId': '5232197187827800',
'orderType': None,
'provider': 'Shipt',
'serviceType': 'Delivery',
'storeExternalId': '801',
'supportsAlternateLocation': False,
'type': 'delivery'}],
'fulfillments': [{'provider': 'Instacart',
'serviceType': 'Delivery',
'storeExternalId': '801',
'type': 'delivery'},
{'provider': 'Shipt',
'serviceType': 'Delivery',
'storeExternalId': '801',
'type': 'delivery'}],
'value': ['801']}}
-> storing as per requirement ( store external id & provider) & it will look like
(801,instacart) & (801,shipt) now this have to get mapped with the lat longs of 1st row .
& dataframe have to be created .
data frame
34.081613, -84.278921 ,801 ,instacart
34.081613, -84.278921 ,801 ,shipt
custom function for this above problem
issue which i am facing is it is getting mapped with all of the rows ,
it must map with only that particular row.
def extract_details_sb_dsp_positive(response):
res = []
# Check to ensure first two keys are in the response, otherwise empty list
if not 'store-boundary-dsp' in response:
return res
if not 'estimates' in response['store-boundary-dsp']:
return res
tuple_da_required=[]
count_rows=0
for i in range(len(data_metering)):
x=data_metering.loc[i, "LATITUDE"]
y=data_metering.loc[i, "LONGITUDE"]
list_store=[x,y]
#print(list_da)
# Iterate through each estimate
for estimate in response['store-boundary-dsp']['estimates']:
# Iterate through each key, value pair for the estimate
store_id = ''
providor = ''
for k, v in estimate.items():
#keep the count of no of estimates
#based on the count copy the lat,lngs,city,state, country,tenantid
if k == 'storeExternalId':
store_id = v
if k == 'provider':
providor = v
res.append([store_id, providor,list_store])
return res
# For each cell in the 'RESPONSE' column, extract out the store_id, provider pair
response_values = data_metering['RESPONSE'].apply(extract_details_sb_dsp_positive).tolist()
pair_values = [val for sublist in response_values for val in sublist]
pair_values
>Solution :
You can try something like this :
from ast import literal_eval
gh_link = "https://raw.githubusercontent.com/ajayvd/DOUBT_SO/main/so_doubt.csv"
df = pd.read_csv(gh_link).assign(RESPONSE= lambda x: x["RESPONSE"].apply(literal_eval))
out = df.join(pd.json_normalize(df.pop("RESPONSE"), record_path=["store-boundary-dsp", "estimates"],)
[["provider", "storeExternalId"]])
Output :
print(out)
LATITUDE LONGITUDE provider storeExternalId
0 37.671080 -92.660949 DoorDash 7510
1 34.081613 -84.278921 Instacart 801
2 40.750311 -84.116898 Shipt 801
3 33.164890 -80.034914 DoorDash 1035
4 34.692532 -82.810008 DoorDash 4883