Averages per time-intervals

dataframe r

I would like to calculate average prices based on different time periods. IThe periods are: a.) 00:00-04:30 b.) 16:00-20:00 and c.) The rest of time-steps. One way to do this is to subset multiple times my data but is it an easier way?

The outcome should be a table with

Date     00:30-04:30  16:00-20:00 Other
2021-11

Sample data:

df<-structure(list(Date = structure(c(1635724800, 1635726600, 1635728400, 
1635730200, 1635732000, 1635733800, 1635735600, 1635737400, 1635739200, 
1635741000, 1635742800, 1635744600, 1635746400, 1635748200, 1635750000, 
1635751800, 1635753600, 1635755400, 1635757200, 1635759000, 1635760800, 
1635762600, 1635764400, 1635766200, 1635768000, 1635769800, 1635771600, 
1635773400, 1635775200, 1635777000, 1635778800, 1635780600, 1635782400, 
1635784200, 1635786000, 1635787800, 1635789600, 1635791400, 1635793200, 
1635795000, 1635796800, 1635798600, 1635800400, 1635802200, 1635804000, 
1635805800, 1635807600, 1635809400), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), Time = structure(c(0, 1800, 3600, 5400, 7200, 
9000, 10800, 12600, 14400, 16200, 18000, 19800, 21600, 23400, 
25200, 27000, 28800, 30600, 32400, 34200, 36000, 37800, 39600, 
41400, 43200, 45000, 46800, 48600, 50400, 52200, 54000, 55800, 
57600, 59400, 61200, 63000, 64800, 66600, 68400, 70200, 72000, 
73800, 75600, 77400, 79200, 81000, 82800, 84600), class = c("hms", 
"difftime"), units = "secs"), Band = c("C", "C", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "C"), Region = c("Roma", "Roma", "Roma", "Roma", 
"Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", 
"Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", 
"Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", 
"Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", 
"Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", "Roma", 
"Roma", "Roma", "Roma", "Roma"), Price = c(3.83, 4.91, 2.97, 
1.3, 2.25, 1.3, 1.89, 1.3, 2.05, 2.35, 2.16, 5.2, 9, 9.38, 9.38, 
11.84, 13.65, 14.3, 12.42, 11.79, 11.78, 9.38, 11.37, 12.6, 11.33, 
12.13, 13.3, 12.4, 14.37, 14.5, 11.6, 14.01, 16.64, 22.46, 24.35, 
24.97, 25.21, 23.57, 18.68, 13.84, 16.65, 12.7, 14.22, 9.66, 
12.45, 8.41, 12.7, 12.46)), spec = structure(list(cols = list(
    Date = structure(list(format = ""), class = c("collector_datetime", 
    "collector")), Time = structure(list(format = ""), class = c("collector_time", 
    "collector")), Band = structure(list(), class = c("collector_character", 
    "collector")), Region = structure(list(), class = c("collector_character", 
    "collector")), Price = structure(list(), class = c("collector_double", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x03904908>, row.names = c(NA, 
-48L), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"
))

Solution 1:

Here's another option. Idea is to first convert your time variable into seconds (starting at 00:00:00) and then create a grouping variable based on the seconds passed since 00:00:00.

library(tidyverse)
library(lubridate)
df %>%
  mutate(period = period_to_seconds(as.period(Time)),
         time_group = case_when(period >= 1*30*60 & period <= 9*30*60 ~ "00:30 - 04:30",
                                period >= 32*30*60 & period <= 40*30*60 ~ "16:00 - 20:00",
                                TRUE ~ "other")) %>%
  group_by(time_group) %>%
  summarize(mean_price = mean(Price))

which gives:

# A tibble: 3 x 2
  time_group    mean_price
  <chr>              <dbl>
1 00:30 - 04:30       2.26
2 16:00 - 20:00      20.7 
3 other              11.1

Solution 2:

You might try:

library(dplyr)
# Get times in seconds
four_thirty_am  <- 4.5 * 60 * 60
four_pm  <- 16 * 60 * 60
eight_pm  <- 20 * 60 * 60

df$timePeriod  <- ifelse(
    df$Time <= four_thirty_am, 
    "00:00-04:30", 
    ifelse(
        df$Time >= four_pm & df$Time <= eight_pm, 
        "16:00-20:00", 
        "Other")
    )

df  %>% 
    group_by(timePeriod)  %>% 
    summarise(meanPrice = mean(Price))

Output:

# A tibble: 3 x 2      
  timePeriod  meanPrice
* <chr>           <dbl>
1 00:00-04:30      2.42
2 16:00-20:00     20.7 
3 Other           11.4

Solution 3:

or with lubridate and hms



library(dplyr)
library(lubridate)
library(hms)

df_new<-df %>%
  mutate(time=Time %>% as_hms()) %>%
  mutate(period=case_when(
    time >=("00:30:00" %>% as_hms()) & time <=("04:30:00" %>% as_hms())  ~ "00:30-04:30", 
    time >=("16:00:00" %>% as_hms()) & time <=("20:00:00" %>% as_hms())  ~ "16:00-20:00",   
    TRUE ~ "other"
  )) %>%
  group_by(period)  %>%
 summarise(meanPrice = mean(Price))

df_new
#> # A tibble: 3 × 2
#>   period      meanPrice
#>   <chr>           <dbl>
#> 1 00:30-04:30      2.26
#> 2 16:00-20:00     20.7 
#> 3 other           11.1

^{Created on 2022-01-16 by the reprex package (v2.0.1)}

Averages per time-intervals

Solution 1:

Solution 2:

Solution 3:

Related

Recent Posts