How to collapse/aggregate values into several columns of dicts
I want to collapse the rows of a dataframe such that each row represents an hour (the source data goes down to minute granularity). I don't want to lose the data, instead I want to create a dict so that the key of the dict is the minute (the source data doesn't go down to seconds) and then have the value be the value for that minute. See my brute force example at the bottom for what I'm really getting at.
I have data that looks something like this
import pandas as pd
from datetime import datetime, timedelta
import itertools
import random
from copy import deepcopy
fruits=['apple','banana','pear']
dts=[datetime(2022,1,1)+timedelta(minutes=x*random.randint(1,9)) for x in range(48)]
df=pd.DataFrame([x for x in itertools.product(fruits, dts)], columns=['fruit','dt'])
df['value1']=[random.randrange(0,100) for i in range(df.shape[0])]
df['value2']=[random.randrange(0,100) for i in range(df.shape[0])]
df['value3']=[random.randrange(0,100) for i in range(df.shape[0])]
# fruit dt value1 value2 value3
# 0 apple 2022-01-01 00:00:00 56 55 65
# 1 apple 2022-01-01 00:02:00 98 67 16
# 2 apple 2022-01-01 00:12:00 52 19 23
# 3 apple 2022-01-01 00:09:00 0 60 82
# 4 apple 2022-01-01 00:08:00 94 51 22
# ... ... ... ... ... ...
# 139 pear 2022-01-01 00:43:00 14 49 37
# 140 pear 2022-01-01 02:56:00 92 98 73
# 141 pear 2022-01-01 02:15:00 9 5 99
# 142 pear 2022-01-01 03:04:00 92 39 10
# 143 pear 2022-01-01 05:29:00 52 61 65
The best I can do using reasonable syntax is:
df['minute']=[x.minute for x in df.dt]
df['hour']=[x.hour+1 for x in df.dt]
df['date']=[x.replace(hour=0, minute=0) for x in df.dt]
def splitdict(x):
mydict={}
x=list(x)
for elem in x:
mydict[elem[0]]=elem[1]
return(mydict)
df.groupby(['fruit','date','hour']).apply(lambda x: splitdict(zip(df.minute,df.value1)))
Of course, this doesn't actually work (so I'll spare you the output). It returns the same dict for every row and it would only work for value1
(ie. I don't know how to get the results for value1, value2, value3
at once) I thought if I did a deepcopy
that would resolve the repeating dict but it didn't so I took that out.
I did a horrendous for loop to brute force what I want. That looks like this:
#brute force
uniq=df.drop_duplicates(['fruit','date','hour'])
uniq=uniq[['fruit','date','hour']]
results=[]
for index, row in uniq.iterrows():
mydict=(df[(df['date']==row['date']) & (df['hour']==row['hour']) & (df['fruit']==row['fruit'])].loc[:,['minute','value1','value2','value3']]).to_dict('records')
value1dict={mydict[i]['minute']:mydict[i]['value1'] for i in range(len(mydict))}
value2dict={mydict[i]['minute']:mydict[i]['value2'] for i in range(len(mydict))}
value3dict={mydict[i]['minute']:mydict[i]['value3'] for i in range(len(mydict))}
results.append(pd.DataFrame({'date':row['date'], 'hour':row['hour'], 'fruit':row['fruit'] , 'value1':[value1dict], 'value2':[value2dict], 'value3':[value3dict]}))
results=pd.concat(results)
# date hour fruit value1 value2 value3
# 0 2022-01-01 1 apple {0: 56, 2: 98, 12: 52, 9: 0, 8: 94, 30: 90, 48: 92, 21: 16, 45: 9, 40: 58, 24: 31, 13: 45, 28: 86, 57: 47, 56: 66, 34: 69, 39: 12, 41: 92, 43: 12} {0: 55, 2: 67, 12: 19, 9: 60, 8: 51, 30: 95, 48: 62, 21: 97, 45: 1, 40: 89, 24: 52, 13: 5, 28: 67, 57: 16, 56: 13, 34: 54, 39: 86, 41: 45, 43: 50} {0: 65, 2: 16, 12: 23, 9: 82, 8: 22, 30: 76, 48: 98, 21: 23, 45: 14, 40: 87, 24: 45, 13: 87, 28: 29, 57: 75, 56: 26, 34: 25, 39: 70, 41: 97, 43: 89}
# 0 2022-01-01 2 apple {12: 31, 39: 42, 42: 87, 20: 86, 45: 62, 28: 86, 55: 59, 44: 83, 0: 26, 2: 97, 36: 55, 24: 44} {12: 74, 39: 55, 42: 16, 20: 12, 45: 92, 28: 2, 55: 98, 44: 44, 0: 63, 2: 54, 36: 76, 24: 55} {12: 97, 39: 81, 42: 76, 20: 71, 45: 26, 28: 56, 55: 61, 44: 93, 0: 90, 2: 87, 36: 28, 24: 52}
# 0 2022-01-01 3 apple {15: 72, 8: 84, 24: 97, 0: 54, 30: 10, 12: 50, 55: 51, 56: 31} {15: 30, 8: 42, 24: 96, 0: 76, 30: 58, 12: 44, 55: 82, 56: 57} {15: 41, 8: 11, 24: 40, 0: 89, 30: 22, 12: 51, 55: 57, 56: 60}
# 0 2022-01-01 5 apple {3: 14, 56: 79, 0: 25} {3: 3, 56: 61, 0: 49} {3: 2, 56: 29, 0: 32}
# 0 2022-01-01 4 apple {52: 53, 0: 50, 4: 53} {52: 32, 0: 64, 4: 26} {52: 88, 0: 28, 4: 59}
# 0 2022-01-01 6 apple {42: 40, 29: 52} {42: 30, 29: 82} {42: 50, 29: 33}
# 0 2022-01-01 1 banana {0: 6, 2: 10, 12: 43, 9: 77, 8: 19, 30: 59, 48: 67, 21: 17, 45: 92, 40: 69, 24: 25, 13: 86, 28: 81, 57: 72, 56: 35, 34: 22, 39: 61, 41: 8, 43: 56} {0: 63, 2: 92, 12: 49, 9: 22, 8: 2, 30: 92, 48: 96, 21: 21, 45: 62, 40: 23, 24: 77, 13: 41, 28: 64, 57: 49, 56: 30, 34: 59, 39: 63, 41: 54, 43: 85} {0: 27, 2: 83, 12: 35, 9: 37, 8: 70, 30: 94, 48: 16, 21: 19, 45: 71, 40: 5, 24: 26, 13: 91, 28: 16, 57: 42, 56: 8, 34: 31, 39: 93, 41: 57, 43: 65}
# 0 2022-01-01 2 banana {12: 0, 39: 53, 42: 13, 20: 91, 45: 88, 28: 66, 55: 46, 44: 24, 0: 41, 2: 32, 36: 69, 24: 11} {12: 83, 39: 21, 42: 41, 20: 81, 45: 79, 28: 37, 55: 28, 44: 2, 0: 51, 2: 87, 36: 63, 24: 82} {12: 6, 39: 57, 42: 18, 20: 3, 45: 74, 28: 58, 55: 46, 44: 25, 0: 45, 2: 96, 36: 37, 24: 14}
# 0 2022-01-01 3 banana {15: 10, 8: 14, 24: 60, 0: 48, 30: 82, 12: 70, 55: 39, 56: 4} {15: 81, 8: 55, 24: 8, 0: 64, 30: 72, 12: 45, 55: 79, 56: 45} {15: 95, 8: 90, 24: 36, 0: 47, 30: 88, 12: 12, 55: 86, 56: 75}
# 0 2022-01-01 5 banana {3: 47, 56: 39, 0: 60} {3: 60, 56: 15, 0: 72} {3: 48, 56: 86, 0: 16}
# 0 2022-01-01 4 banana {52: 49, 0: 30, 4: 86} {52: 39, 0: 85, 4: 5} {52: 64, 0: 22, 4: 96}
# 0 2022-01-01 6 banana {42: 2, 29: 26} {42: 15, 29: 54} {42: 61, 29: 58}
# 0 2022-01-01 1 pear {0: 39, 2: 55, 12: 25, 9: 98, 8: 14, 30: 82, 48: 59, 21: 77, 45: 8, 40: 75, 24: 19, 13: 92, 28: 39, 57: 63, 56: 95, 34: 77, 39: 77, 41: 41, 43: 14} {0: 54, 2: 11, 12: 63, 9: 12, 8: 38, 30: 34, 48: 96, 21: 27, 45: 19, 40: 87, 24: 83, 13: 28, 28: 22, 57: 25, 56: 38, 34: 66, 39: 80, 41: 80, 43: 49} {0: 98, 2: 71, 12: 97, 9: 54, 8: 70, 30: 22, 48: 31, 21: 4, 45: 47, 40: 42, 24: 28, 13: 68, 28: 65, 57: 73, 56: 32, 34: 1, 39: 73, 41: 39, 43: 37}
# 0 2022-01-01 2 pear {12: 93, 39: 57, 42: 12, 20: 98, 45: 69, 28: 60, 55: 77, 44: 16, 0: 96, 2: 16, 36: 76, 24: 15} {12: 23, 39: 63, 42: 59, 20: 15, 45: 66, 28: 50, 55: 18, 44: 87, 0: 33, 2: 15, 36: 9, 24: 90} {12: 80, 39: 50, 42: 98, 20: 12, 45: 54, 28: 90, 55: 67, 44: 37, 0: 86, 2: 2, 36: 51, 24: 64}
# 0 2022-01-01 3 pear {15: 9, 8: 71, 24: 3, 0: 94, 30: 53, 12: 90, 55: 28, 56: 92} {15: 5, 8: 85, 24: 77, 0: 53, 30: 26, 12: 62, 55: 2, 56: 98} {15: 99, 8: 4, 24: 14, 0: 86, 30: 50, 12: 17, 55: 70, 56: 73}
# 0 2022-01-01 5 pear {3: 57, 56: 2, 0: 65} {3: 32, 56: 16, 0: 90} {3: 88, 56: 74, 0: 80}
# 0 2022-01-01 4 pear {52: 23, 0: 32, 4: 92} {52: 4, 0: 93, 4: 39} {52: 30, 0: 97, 4: 10}
# 0 2022-01-01 6 pear {42: 43, 29: 52} {42: 63, 29: 61} {42: 80, 29: 65}
Solution 1:
IIUC
cols_to_dict = ['value1', 'value2', 'value3']
out = df.assign(date=df['dt'].dt.date, hour=df['dt'].dt.hour, minute=df['dt'].dt.minute) \
.set_index('minute').groupby(['date', 'hour', 'fruit'])[cols_to_dict] \
.agg(dict).sort_index(level=['date', 'fruit', 'hour']).reset_index()
Output:
>>> out
date hour fruit value1 value2 value3
0 2022-01-01 0 apple {0: 67, 9: 82, 6: 33, 21: 74, 16: 99, 20: 82, ... {0: 46, 9: 47, 6: 57, 21: 21, 16: 8, 20: 96, 1... {0: 25, 9: 42, 6: 1, 21: 99, 16: 63, 20: 47, 1...
1 2022-01-01 1 apple {3: 30, 0: 36, 5: 81, 24: [51, 57, 59], 25: 77... {3: 40, 0: 32, 5: 14, 24: [18, 49, 58], 25: 60... {3: 83, 0: 80, 5: 17, 24: [37, 24, 34], 25: 16...
2 2022-01-01 2 apple {0: 49, 24: [64, 26], 40: 6, 18: 48, 4: 11, 16... {0: 77, 24: [53, 77], 40: 80, 18: 11, 4: 76, 1... {0: 0, 24: [62, 75], 40: 44, 18: 13, 4: 56, 16...
3 2022-01-01 3 apple {20: [88, 88], 28: 94, 54: 48, 25: 34} {20: [68, 34], 28: 55, 54: 15, 25: 46} {20: [16, 49], 28: 50, 54: 11, 25: 26}
4 2022-01-01 4 apple {36: 91} {36: 86} {36: 95}
5 2022-01-01 5 apple {33: 21, 36: 70, 8: 89} {33: 34, 36: 66, 8: 69} {33: 39, 36: 38, 8: 92}
6 2022-01-01 6 apple {45: 87} {45: 80} {45: 24}
7 2022-01-01 0 banana {0: 67, 9: 67, 6: 64, 21: 80, 16: 29, 20: 85, ... {0: 30, 9: 33, 6: 4, 21: 64, 16: 29, 20: 59, 1... {0: 92, 9: 37, 6: 93, 21: 40, 16: 49, 20: 97, ...
8 2022-01-01 1 banana {3: 12, 0: 73, 5: 6, 24: [19, 81, 44], 25: 11,... {3: 41, 0: 22, 5: 86, 24: [68, 41, 7], 25: 25,... {3: 94, 0: 52, 5: 15, 24: [19, 59, 12], 25: 2,...
9 2022-01-01 2 banana {0: 49, 24: [36, 87], 40: 35, 18: 26, 4: 21, 1... {0: 96, 24: [30, 45], 40: 86, 18: 33, 4: 2, 16... {0: 53, 24: [39, 22], 40: 84, 18: 7, 4: 47, 16...
10 2022-01-01 3 banana {20: [45, 27], 28: 58, 54: 96, 25: 90} {20: [14, 22], 28: 49, 54: 14, 25: 90} {20: [94, 50], 28: 19, 54: 26, 25: 65}
11 2022-01-01 4 banana {36: 64} {36: 62} {36: 43}
12 2022-01-01 5 banana {33: 83, 36: 4, 8: 34} {33: 13, 36: 36, 8: 37} {33: 2, 36: 63, 8: 94}
13 2022-01-01 6 banana {45: 22} {45: 2} {45: 0}
14 2022-01-01 0 pear {0: 55, 9: 29, 6: 30, 21: 57, 16: 37, 20: 63, ... {0: 74, 9: 38, 6: 18, 21: 47, 16: 47, 20: 34, ... {0: 25, 9: 75, 6: 36, 21: 60, 16: 94, 20: 68, ...
15 2022-01-01 1 pear {3: 20, 0: 22, 5: 1, 24: [94, 27, 44], 25: 73,... {3: 59, 0: 50, 5: 7, 24: [34, 15, 28], 25: 24,... {3: 90, 0: 71, 5: 75, 24: [4, 4, 63], 25: 73, ...
16 2022-01-01 2 pear {0: 42, 24: [83, 98], 40: 83, 18: 34, 4: 58, 1... {0: 55, 24: [13, 50], 40: 39, 18: 37, 4: 68, 1... {0: 69, 24: [80, 49], 40: 80, 18: 82, 4: 13, 1...
17 2022-01-01 3 pear {20: [33, 50], 28: 47, 54: 16, 25: 0} {20: [28, 26], 28: 74, 54: 66, 25: 13} {20: [40, 67], 28: 88, 54: 96, 25: 4}
18 2022-01-01 4 pear {36: 46} {36: 28} {36: 97}
19 2022-01-01 5 pear {33: 27, 36: 23, 8: 57} {33: 71, 36: 82, 8: 57} {33: 23, 36: 88, 8: 91}
20 2022-01-01 6 pear {45: 83} {45: 45} {45: 3}