How can I find all datetimes in a list within n minutes of a given value?

Solution 1:

IIUC, you want to compare all combinations, so you need to check all.

Please read the end of the answer for a note on datetime/timedelta.

Using itertools.product:

list_a = ['10:26:42', '8:55:43', '7:34:11']
list_b = ['10:49:20', '8:51:10', '10:34:35', '8:39:47', '7:11:49', '7:42:10']

import datetime
from itertools import product

str2time = lambda s: datetime.datetime.strptime(s, "%H:%M:%S")

for a,b in product(map(str2time, list_a), map(str2time, list_b)):
    if abs(a-b).total_seconds() <= 1800:
        print(f'{a:%H:%M:%S} is within 30m of {b:%H:%M:%S}')

output:

10:26:42 is within 30m of 10:49:20
10:26:42 is within 30m of 10:34:35
08:55:43 is within 30m of 08:51:10
08:55:43 is within 30m of 08:39:47
07:34:11 is within 30m of 07:11:49
07:34:11 is within 30m of 07:42:10

Using nested for loops:

import datetime

str2time = lambda s: datetime.datetime.strptime(s, "%H:%M:%S")

for a in map(str2time, list_a):
    start = f'{a:%H:%M:%S} is within 30m of'
    for b in map(str2time, list_b):
        if abs(a-b).total_seconds() <= 1800:
            print(f'{start} {b:%H:%M:%S}', end='')
            start = ','
    if start == ',':
        print()

output:

10:26:42 is within 30m of 10:49:20, 10:34:35
08:55:43 is within 30m of 08:51:10, 08:39:47
07:34:11 is within 30m of 07:11:49, 07:42:10

note on datetime

Using datetime without date will default to 1900-01-01, which can have edge effects close to midnight. Instead, you could use timedelta objects. With my code you need to change the str2time function to:

def str2time(s):
    h,m,s = map(int, s.split(':'))
    return datetime.timedelta(hours=h, minutes=m, seconds

And alter a bit the code to be able to convert to string:

z = datetime.datetime(1900,1,1)

for a in map(str2time, list_a):
    start = f'{z+a:%H:%M:%S} is within 30m of'
    for b in map(str2time, list_b):
        if abs(a-b).total_seconds() <= 1800:
            print(f'{start} {z+b:%H:%M:%S}', end='')
            start = ','
    if start == ',':
        print()

Solution 2:

You loop and loot at the absolute time diff of all elements instead of using min:

list_a = ["10:26:42", "8:55:43", "7:34:11"]
list_b = ["10:49:20", "8:51:10", "10:34:35", "8:39:47", "7:11:49", "7:42:10"]

import datetime
import datetime

# Convert the Lists to Datetime Format
list_a = [datetime.datetime.strptime(d,"%H:%M:%S") for d in list_a]
list_b = [datetime.datetime.strptime(d,"%H:%M:%S") for d in list_b]

for value in list_a:
    for v in list_b:
        if abs(value-v) < datetime.timedelta(minutes=30):
            print (value, "=>", v, "diff: ", (value-v).total_seconds() // 60)
    print()
            

Output:

1900-01-01 10:26:42 => 1900-01-01 10:49:20 diff:  -23.0
1900-01-01 10:26:42 => 1900-01-01 10:34:35 diff:  -8.0

1900-01-01 08:55:43 => 1900-01-01 08:51:10 diff:  4.0
1900-01-01 08:55:43 => 1900-01-01 08:39:47 diff:  15.0

1900-01-01 07:34:11 => 1900-01-01 07:11:49 diff:  22.0
1900-01-01 07:34:11 => 1900-01-01 07:42:10 diff:  -8.0

This will go wrong for datetimes like 0:05:00 and 23:55:00 because they are lying on different dates though.

You can fix that with a self-written delta calculation:

def abs_time_diff(dt1, dt2, *, ignore_date = False):
    if not ignore_date:
        return abs(dt1-dt2)
    # use day before, this day and day after, report minimum
    return min ( (abs(dt1 + datetime.timedelta(days = delta) - dt2) 
                  for delta in range(-1,2)))

list_a = ["0:5:0"]
list_b = ["0:20:0", "23:55:0"]

list_a = [datetime.datetime.strptime(d,"%H:%M:%S")  for d in list_a]
list_b = [datetime.datetime.strptime(d,"%H:%M:%S")  for d in list_b]

for value in list_a:
    for v in list_b:
        print (value, v, abs_time_diff(value,v))
        print (value, v, abs_time_diff(value,v, ignore_date = True))

Output:

1900-01-01 00:05:00 1900-01-01 00:20:00 0:15:00
1900-01-01 00:05:00 1900-01-01 00:20:00 0:15:00

1900-01-01 00:05:00 1900-01-01 23:55:00 23:50:00 # with date
1900-01-01 00:05:00 1900-01-01 23:55:00 0:10:00  # ignores date

Solution 3:

I'll go out an suggest using pandas for this:

# Convert to pandas datetime series
import pandas as pd
dt_a = pd.Series(list_a, dtype='datetime64[ns]')
dt_b = pd.Series(list_b, dtype='datetime64[ns]')

# Comparison loop
interv_size = '30m'   # Thirty minutes
for el in dt_a:
    hits = df_b.loc[ abs(el - df_b) < interv_size ].dt.time
    print(f'{el.time()} is within {interv_size} of', *hits) 

The advantage? You let python deal with the format of your dates

Solution 4:

from datetime import datetime, timedelta

list_a = ["10:26:42", "8:55:43", "7:34:11"]
list_b = ["10:49:20", "8:51:10", "10:34:35", "8:39:47", "7:11:49", "7:42:10"]

time_format = "%H:%M:%S"


def convert_to_datetime(time_str):
    return datetime.strptime(time_str, time_format)


# Overriding list_a and list_ to avoid polluting the namespace
# Sorting for simple optimization
list_a = sorted([convert_to_datetime(time_str) for time_str in list_a])
list_b = sorted([convert_to_datetime(time_str) for time_str in list_b])

time_range_limit_in_seconds = timedelta(minutes=30).total_seconds()

result = []
for list_a_datetime in list_a:
    with_in_time_limit = []
    for list_b_datetime in list_b:
        difference_in_seconds = (
            list_a_datetime-list_b_datetime).total_seconds()

        if difference_in_seconds <= time_range_limit_in_seconds:
            # Convert back to string
            with_in_time_limit.append(
                list_b_datetime.strftime(time_format)
            )

        # Since the list is sorted, all the rest don't fall in time range
        if difference_in_seconds < 0:
            break

    print(list_a_datetime.strftime(time_format), with_in_time_limit)