Averages per time-intervals
I would like to calculate average prices based on different time periods. IThe periods are:
a.) 00:00-04:30
b.) 16:00-20:00
and c.) The rest of time-steps. One way to do this is to subset multiple times my data but is it an easier way?
The outcome should be a table with
Date 00:30-04:30 16:00-20:00 Other
2021-11
Sample data:
df<-structure(list(Date = structure(c(1635724800, 1635726600, 1635728400,
1635730200, 1635732000, 1635733800, 1635735600, 1635737400, 1635739200,
1635741000, 1635742800, 1635744600, 1635746400, 1635748200, 1635750000,
1635751800, 1635753600, 1635755400, 1635757200, 1635759000, 1635760800,
1635762600, 1635764400, 1635766200, 1635768000, 1635769800, 1635771600,
1635773400, 1635775200, 1635777000, 1635778800, 1635780600, 1635782400,
1635784200, 1635786000, 1635787800, 1635789600, 1635791400, 1635793200,
1635795000, 1635796800, 1635798600, 1635800400, 1635802200, 1635804000,
1635805800, 1635807600, 1635809400), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), Time = structure(c(0, 1800, 3600, 5400, 7200,
9000, 10800, 12600, 14400, 16200, 18000, 19800, 21600, 23400,
25200, 27000, 28800, 30600, 32400, 34200, 36000, 37800, 39600,
41400, 43200, 45000, 46800, 48600, 50400, 52200, 54000, 55800,
57600, 59400, 61200, 63000, 64800, 66600, 68400, 70200, 72000,
73800, 75600, 77400, 79200, 81000, 82800, 84600), class = c("hms",
"difftime"), units = "secs"), Band = c("C", "C", "C", "C", "C",
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C",
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C",
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C",
"C", "C", "C", "C"), Region = c("Roma", "Roma", "Roma", "Roma",
"Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma",
"Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma",
"Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma",
"Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma",
"Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma",
"Roma", "Roma", "Roma", "Roma"), Price = c(3.83, 4.91, 2.97,
1.3, 2.25, 1.3, 1.89, 1.3, 2.05, 2.35, 2.16, 5.2, 9, 9.38, 9.38,
11.84, 13.65, 14.3, 12.42, 11.79, 11.78, 9.38, 11.37, 12.6, 11.33,
12.13, 13.3, 12.4, 14.37, 14.5, 11.6, 14.01, 16.64, 22.46, 24.35,
24.97, 25.21, 23.57, 18.68, 13.84, 16.65, 12.7, 14.22, 9.66,
12.45, 8.41, 12.7, 12.46)), spec = structure(list(cols = list(
Date = structure(list(format = ""), class = c("collector_datetime",
"collector")), Time = structure(list(format = ""), class = c("collector_time",
"collector")), Band = structure(list(), class = c("collector_character",
"collector")), Region = structure(list(), class = c("collector_character",
"collector")), Price = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x03904908>, row.names = c(NA,
-48L), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"
))
Solution 1:
Here's another option. Idea is to first convert your time variable into seconds (starting at 00:00:00) and then create a grouping variable based on the seconds passed since 00:00:00.
library(tidyverse)
library(lubridate)
df %>%
mutate(period = period_to_seconds(as.period(Time)),
time_group = case_when(period >= 1*30*60 & period <= 9*30*60 ~ "00:30 - 04:30",
period >= 32*30*60 & period <= 40*30*60 ~ "16:00 - 20:00",
TRUE ~ "other")) %>%
group_by(time_group) %>%
summarize(mean_price = mean(Price))
which gives:
# A tibble: 3 x 2
time_group mean_price
<chr> <dbl>
1 00:30 - 04:30 2.26
2 16:00 - 20:00 20.7
3 other 11.1
Solution 2:
You might try:
library(dplyr)
# Get times in seconds
four_thirty_am <- 4.5 * 60 * 60
four_pm <- 16 * 60 * 60
eight_pm <- 20 * 60 * 60
df$timePeriod <- ifelse(
df$Time <= four_thirty_am,
"00:00-04:30",
ifelse(
df$Time >= four_pm & df$Time <= eight_pm,
"16:00-20:00",
"Other")
)
df %>%
group_by(timePeriod) %>%
summarise(meanPrice = mean(Price))
Output:
# A tibble: 3 x 2
timePeriod meanPrice
* <chr> <dbl>
1 00:00-04:30 2.42
2 16:00-20:00 20.7
3 Other 11.4
Solution 3:
or with lubridate
and hms
library(dplyr)
library(lubridate)
library(hms)
df_new<-df %>%
mutate(time=Time %>% as_hms()) %>%
mutate(period=case_when(
time >=("00:30:00" %>% as_hms()) & time <=("04:30:00" %>% as_hms()) ~ "00:30-04:30",
time >=("16:00:00" %>% as_hms()) & time <=("20:00:00" %>% as_hms()) ~ "16:00-20:00",
TRUE ~ "other"
)) %>%
group_by(period) %>%
summarise(meanPrice = mean(Price))
df_new
#> # A tibble: 3 × 2
#> period meanPrice
#> <chr> <dbl>
#> 1 00:30-04:30 2.26
#> 2 16:00-20:00 20.7
#> 3 other 11.1
Created on 2022-01-16 by the reprex package (v2.0.1)