You may try this. Please note that in order to make the example smaller, I select data only for day 1-4 and hour 0-1 each month. Day 1 & 2 in each month have data on occurrence, and day 2 & 3 are missing data for occurrence.
library(dplyr)
# create dummy data
set.seed(123) # for reproducibility of sample
d1 <- data.frame(time = seq(from = as.POSIXct("2000-01-01"),
to = as.POSIXct("2000-02-28"),
by = "hour"))
d1 <- d1 %>%
mutate(hour = as.integer(format(time, "%H")),
day = as.integer(format(time, "%d")), # <~~ only needed to generate sample data
month = as.integer(format(time, "%m")),
occurence = sample(1:10, length(time), replace = TRUE),
occurence = ifelse(day %in% 1:2, occurence, NA)) %>% # <~~~ data only for day 1-2
filter(hour %in% 0:1 & day %in% 1:4) %>% # <~~~ smaller example: select hour 0-1, day 1-4
select(-day)
# calculate mean occurrence per month and hour
d2 <- d1 %>%
group_by(month, hour) %>%
summarise(mean_occ = round(mean(occurence, na.rm = TRUE), 1))
d2
# month hour mean_occ
# 1 1 0 5.0
# 2 1 1 8.0
# 3 2 0 5.5
# 4 2 1 6.5
# replace missing occurrence with mean_occ
d3 <- d1 %>%
left_join(d2, by = c("hour", "month")) %>%
mutate(occurence2 = ifelse(is.na(occurence), mean_occ, occurence)) %>%
select(-month, -mean_occ)
d3
# hour time occurence occurence2
# 1 0 2000-01-01 00:00:00 3 3.0
# 2 1 2000-01-01 01:00:00 8 8.0
# 3 0 2000-01-02 00:00:00 7 7.0
# 4 1 2000-01-02 01:00:00 8 8.0
# 5 0 2000-01-03 00:00:00 NA 5.0
# 6 1 2000-01-03 01:00:00 NA 8.0
# 7 0 2000-01-04 00:00:00 NA 5.0
# 8 1 2000-01-04 01:00:00 NA 8.0
# 9 0 2000-02-01 00:00:00 4 4.0
# 10 1 2000-02-01 01:00:00 6 6.0
# 11 0 2000-02-02 00:00:00 7 7.0
# 12 1 2000-02-02 01:00:00 7 7.0
# 13 0 2000-02-03 00:00:00 NA 5.5
# 14 1 2000-02-03 01:00:00 NA 6.5
# 15 0 2000-02-04 00:00:00 NA 5.5
# 16 1 2000-02-04 01:00:00 NA 6.5
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…