How can I select the earliest date from a dataframe after a given cutoff date?

Solution 1:

Proposed solution below. It uses grouping by 'State' to get the next item in a group, using grouped.first() after filtering the series to only allow dates which satisfy the test condition (being after today)

import datetime
today = datetime.datetime.today().strftime('%Y-%m-%d')


def fill_orders(test_date, orders_df, cutoff_df, deliverydates_df):
 # Get a flat file of cutoff dates
 cutoffs_series = cutoff_df.set_index('State').stack()

 # Get next date after test_date
 next_cutoff = cutoffs_series[cutoffs_series > test_date].groupby(by='State').first()
 next_cutoff.name='nextcutoff'

 # Get flat delivery dates
 flat_delivery_dates = deliverydates_df.set_index('State').stack().to_frame('deliverydate')

 # Join next cutoff date
 joined = flat_delivery_dates.join(next_cutoff)

 # Get next delivery date after next cutoff
 next_delivery = joined.loc[joined['nextcutoff'] <= joined['deliverydate'], :].groupby(by=['State']).first()['deliverydate']

 # Join to orders
 return orders_df.merge(next_delivery, left_on='State', right_index=True)

fill_orders(today, orders, cutoff, deliverydates)