How can I select the earliest date from a dataframe after a given cutoff date?
Solution 1:
Proposed solution below. It uses grouping by 'State' to get the next item in a group, using grouped.first()
after filtering the series to only allow dates which satisfy the test condition (being after today)
import datetime
today = datetime.datetime.today().strftime('%Y-%m-%d')
def fill_orders(test_date, orders_df, cutoff_df, deliverydates_df):
# Get a flat file of cutoff dates
cutoffs_series = cutoff_df.set_index('State').stack()
# Get next date after test_date
next_cutoff = cutoffs_series[cutoffs_series > test_date].groupby(by='State').first()
next_cutoff.name='nextcutoff'
# Get flat delivery dates
flat_delivery_dates = deliverydates_df.set_index('State').stack().to_frame('deliverydate')
# Join next cutoff date
joined = flat_delivery_dates.join(next_cutoff)
# Get next delivery date after next cutoff
next_delivery = joined.loc[joined['nextcutoff'] <= joined['deliverydate'], :].groupby(by=['State']).first()['deliverydate']
# Join to orders
return orders_df.merge(next_delivery, left_on='State', right_index=True)
fill_orders(today, orders, cutoff, deliverydates)