<- "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/subdaily/cloudiness/historical/"
url <- "N_Terminwerte_Beschreibung_Stationen.txt"
stations_file if (!file.exists("data")) {
dir.create("data")
}if (!file.exists(paste0("data/", stations_file))) {
download.file(paste0(url, stations_file), here(
"data", stations_file
mode = "wb")
), }
This is an example of how to create an animated visualization of spatial data using R. The data is sourced from the German Weather Service (Deutscher Wetterdienst, DWD) and includes cloud coverage and density observations from various weather stations across Germany.
Load the stations data
Here we will download the stations data from the DWD website. The data contains information about weather stations, including their IDs, names, locations, and the time period they were active.
Let’s read the stations data.
<- c(
col_names "STATIONS_ID", "von_datum", "bis_datum", "Stationshoehe", "lat",
"lon", "Stationsname", "Bundesland", "Abgabe"
)
<- read.fwf(
stations here(
"posts", "20250704-animation", "data",
"N_Terminwerte_Beschreibung_Stationen.txt"
),widths = c(6, 9, 9, 15, 12, 10, 41, 41, 5), skip = 2,
fileEncoding = "Windows-1252", col.names = col_names
|> as.data.table()
)
:= as.Date(str_trim(von_datum), format = "%Y%m%d")]
stations[, von_datum := as.Date(str_trim(bis_datum), format = "%Y%m%d")]
stations[, bis_datum := as.numeric(lon)]
stations[, lon := as.numeric(lat)] stations[, lat
Read the links to the data files
We will read the HTML content of the DWD website to extract the links to the cloudiness data files. The links will be filtered to include only those that contain the term “terminwerte”.
<- read_html(url)
page_content
<- page_content |>
links html_nodes("a") |>
html_attr("href")
<- links[!is.na(links) & str_detect(links, "terminwerte")]
links <- links |> data.table()
links |> head() links
links
<char>
1: terminwerte_N_00001_19370101_19860630_hist.zip
2: terminwerte_N_00003_18910101_20110331_hist.zip
3: terminwerte_N_00044_19710301_20111231_hist.zip
4: terminwerte_N_00052_19730101_20011231_hist.zip
5: terminwerte_N_00061_19750701_19780831_hist.zip
6: terminwerte_N_00070_19730601_19860930_hist.zip
Extract the station IDs from the links. The station IDs are 5-digit numbers that are part of the file names.
:= str_extract(links, "[0-9]{5}")]
links[, STATIONS_ID := as.integer(STATIONS_ID)] links[, STATIONS_ID
Filter the stations data
We will download only those stations data that were active during the specified time period.
<- stations[von_datum <= "2023-12-01" & bis_datum >= "2025-01-01"]
stations <- links[stations, on = "STATIONS_ID"] links
Download and process the data files
In this section, we will download the data files from the DWD website and process them to extract the cloud coverage and density observations. The data will be stored in a DuckDB database which is useful if we need to reuse the data later without downloading and parsing it again.
# check if the files exist
<- list.files("data", full.names = TRUE)
files <- files[str_detect(files, "produkt_n_termin")]
files
if (length(files) == 0) {
for (link in links$links) {
download.file(paste0(url, link), here(
"posts", "20250704-animation", "data", link
mode = "wb")
), unzip(paste0("data/", link), exdir = "data")
unlink("data/Metadaten*")
unlink("data/*.html")
}
}unlink("data/*.zip")
Here is where parsing is done.
<- dbConnect(duckdb(),
con dbdir = here("posts", "20250704-animation", "db", "weather.duckdb")
)<- dbGetQuery(con, "SHOW ALL TABLES;")["name"]
tables <- "2023-12-01"
start_date
if (!("cloudiness" %in% tables$name)) {
<- list.files("data", full.names = TRUE)
files <- files[str_detect(files, "produkt_n_termin")]
files
<- data.table()
observations
for (file in files) {
<- read.csv(file, sep = ";") |> as.data.table()
temp_data := as.Date(str_trim(MESS_DATUM), format = "%Y%m%d")]
temp_data[, MESS_DATUM <- temp_data[MESS_DATUM >= start_date]
temp_data <- rbind(
observations
observations,
temp_data
)
}
<- observations[N_TER != -999]
observations := ifelse(CD_TER == -999, NA, CD_TER)]
observations[, CD_TER
if (!file.exists("db")) {
dir.create("db")
}
<- dbConnect(duckdb(),
con dbdir = here(
"posts", "20250704-animation", "db", "weather.duckdb"
)
)dbWriteTable(con, "cloudiness", observations, overwrite = TRUE)
}
<- dbGetQuery(
observations
con,sprintf("SELECT * FROM cloudiness WHERE MESS_DATUM >= '%s'", start_date)
|>
) as.data.table()
dbDisconnect(con)
|> glimpse() observations
Rows: 231,108
Columns: 6
$ STATIONS_ID <int> 4024, 4024, 4024, 4024, 4024, 4024, 4024, 4024, 4024, 4024…
$ MESS_DATUM <date> 2023-12-01, 2023-12-01, 2023-12-01, 2023-12-02, 2023-12-0…
$ QN_4 <int> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9…
$ N_TER <int> 4, 5, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 8, 8…
$ CD_TER <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ eor <chr> "eor", "eor", "eor", "eor", "eor", "eor", "eor", "eor", "e…
As the data contains multiple observations per day for each station, we will aggregate the data to get the average cloud coverage and density per day per station.
<- observations[, .(
observations
.N,cloud_coverage = mean(N_TER, na.rm = TRUE),
cloud_density = mean(CD_TER, na.rm = TRUE)
= c("STATIONS_ID", "MESS_DATUM")] ), by
Let’s plot the cloud coverage for a specific station as a time series to visualize the data.
> "2024-01-01" & STATIONS_ID == 433] |>
observations[MESS_DATUM ggplot(aes(x = MESS_DATUM, y = cloud_coverage)) +
geom_line(color = "gray") +
geom_line(aes(y = rollapply(cloud_coverage,
width = 7, FUN = mean, align = "center", partial = TRUE
color = "blue") +
)), labs(title = NULL, x = NULL, y = "Cloud Coverage") +
theme_gray()
Add H3 addresses
To visualize the data on a map, we will convert the latitude and longitude coordinates of the stations into H3 addresses. H3 is a geospatial indexing system that allows us to represent geographic locations as hexagonal cells.
<- stations[, .(lon, lat)] |> unique()
points
:= point_to_cell(points, res = 4)] points[, h3_address
Assuming columns 1 and 2 contain x, y coordinates in EPSG:4326
<- stations[, c(
stations "STATIONS_ID", "von_datum", "bis_datum", "Stationshoehe",
"lat", "lon", "Stationsname", "Bundesland", "Abgabe"
)]
= .(lon, lat), h3_address := h3_address]
stations[points, on
<- stations[, geometry := cell_to_polygon(h3_address, simple = F)[2]] stations
Load the boundaries of Germany to use as a background for the map.
<- geoboundaries("Germany", release_type = "gbOpen", adm_lvl = "adm1") boundaries
Join the stations data with the observations data to have the geometry of the stations in the observations data. Calculate mean for each H3 address and drop duplicates.
<- observations[stations, on = "STATIONS_ID"]
observations := mean(cloud_coverage, na.rm = TRUE), by = h3_address]
observations[, coud_coverage <- observations |> unique(by = c("h3_address", "MESS_DATUM")) observations
Create animations of cloud coverage in Germany
In the following section, we will create animations of cloud coverage in Germany using the observations data in 2024. This code creates a series of maps showing the average cloud coverage for each day in 2024, with a rolling average of 7 days to smooth the data.
<- observations[, MESS_DATUM] |> min(na.rm = TRUE)
min_date <- observations[, MESS_DATUM] |> max(na.rm = TRUE)
max_date
<- max(c(min_date, as.Date("2024-01-01")))
min_date <- min(c(max_date, as.Date("2024-12-31")))
max_date
<- observations[(MESS_DATUM >= min_date) &
max_coverage <= max_date), cloud_coverage] |> max(na.rm = TRUE)
(MESS_DATUM <- observations[(MESS_DATUM >= min_date) &
min_coverage <= max_date), cloud_coverage] |> min(na.rm = TRUE)
(MESS_DATUM
<- seq(min_date - 7, max_date + 7, by = 1) |>
table_dates as.data.table() |>
rename(MESS_DATUM = V1)
<- table_dates[observations, on = "MESS_DATUM"]
observations
:= rollapply(cloud_coverage,
observations[, cloud_coverage_r7 width = 7, FUN = mean, align = "center", partial = TRUE
),= STATIONS_ID
by
]
<- seq(min_date, max_date, by = 1)
dates
if (!file.exists("figures")) {
dir.create("figures")
}
for (d in as.character(dates)) {
<- ggplot(aes(fill = cloud_coverage_r7),
p data = observations[MESS_DATUM == d] |> as.data.frame()
+
) geom_sf(data = boundaries, fill = "gray78", color = "gray54") +
geom_sf(aes(geometry = geometry), color = "gray78") +
scale_fill_whitebox_c(
palette = "deep",
direction = 1,
limits = c(min_coverage, max_coverage)
+
) coord_sf(default_crs = sf::st_crs(4326)) +
theme_void() +
theme(
legend.position = "bottom",
legend.key.height = unit(4, "pt"),
legend.key.width = unit(20, "pt"),
legend.title.position = "top",
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
plot.background = element_rect(fill = NA, color = NA),
title = element_text(size = 8, color = "gray35")
+
) labs(title = d, fill = "Cloud Coverage")
ggsave(
here(
"posts", "20250704-animation", "figures",
paste0("cloudiness-", d, ".png")
),plot = p,
units = "px",
width = 1200,
height = 1200,
dpi = 300
) }
Create a GIF animation
Finally, we will create a GIF animation from the generated PNG files. The GIF will show the cloud coverage in Germany over the course of 2024, with each frame representing a day.
<- list.files(
png_files here("posts", "20250704-animation", "figures"),
full.names = TRUE, pattern = "cloudiness.+\\.png"
|>
) sort() |>
as.character()
<- here(
gif_file "posts", "20250704-animation", "animation-cloudiness.gif"
)
gifski(png_files, gif_file, width = 1200, height = 1200, delay = 0.1, loop = TRUE)
[1] "/mnt/Projects/Blog/posts/20250704-animation/animation-cloudiness.gif"
unlink(here("posts", "20250704-animation", "figures", "cloudiness*"))
The resulting GIF animation shows the cloud coverage in Germany for each day in 2024, with a rolling average of 7 days to smooth the data. The animation provides a clear visual representation of how cloud coverage changed over time across different regions in Germany.
Source code available in the repository: Animations-in-R