#!/usr/bin/env python # coding: utf-8 # # wetterdienst - A simple example # # pip install wetterdienst # ## Import modules necessary for general functioning # In[1]: from wetterdienst import metadata_for_dwd_data, collect_dwd_data, get_nearest_station from wetterdienst.enumerations.period_type_enumeration import PeriodType from wetterdienst.enumerations.time_resolution_enumeration import TimeResolution from wetterdienst.enumerations.parameter_enumeration import Parameter import numpy as np get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib import cm # ## 1. First check the metadata to inform yourself of available stations # (here we pick historical daily precipitation - hdp) # In[2]: metadata_hdp = metadata_for_dwd_data( Parameter.PRECIPITATION_MORE, TimeResolution.DAILY, PeriodType.HISTORICAL) print("Number of stations with available data: ", metadata_hdp["HAS_FILE"].sum()) print("Some of the stations:") metadata_hdp.head() # The metadata includes an id, the range of the measurements, the position (including height) as well as place and state of it and if it has a file. With the following plot we want to show a map of those stations: # In[3]: cmap = cm.get_cmap('viridis') bounds = metadata_hdp.STATIONHEIGHT.quantile([0, 0.25, 0.5, 0.75, 1]).values norm = mpl.colors.BoundaryNorm(bounds, cmap.N) fig, ax = plt.subplots(figsize=(10, 10)) plot = metadata_hdp.plot.scatter( x="LON", y="LAT", c="STATIONHEIGHT", cmap=cmap, norm=norm, ax=ax) plot.set_title("Map of daily precipitation stations in Germany\n" "Color refers to height of station") plt.show() # ## 2. The usual way of retrieving data # Usually there are three steps to follow: # - select indexed files based on # - its station_id # - "1048" for Dresden, Germany # - its parameter # - "kl" for climate # - its time_resolution # - "daily" for daily data # - its period_type # - "historical" for data up to the end of the last year # - download the resulting list of files # - parse it into pandas.DataFrames # # We have summarized those steps into one: # - collect_dwd_data # # Let's try it out for the above selection: # In[4]: print("Receiving historical daily climate data for Dresden-Klotzsche (1048)") station_data = collect_dwd_data( [1048], Parameter.CLIMATE_SUMMARY, TimeResolution.DAILY, PeriodType.HISTORICAL) station_data # See that DATE is already parsed, so we can easily get some nice graphs with matplotlib, which we will # do in the next part. # ## 3. Let's create some plots # First to handle the data easier, we want it to be transformed from tabular to column data, # which means instead of having the data in several columns with an additional date column, # we want three columns, where a set of all three defines the date of measured data, element # and the exact value. To make it clear look at the following table, which is already transformed. # In[5]: id_vars = ["STATION_ID", "DATE"] value_vars = [var for var in station_data.columns if var not in id_vars] station_data_transformed = station_data.melt( id_vars=id_vars, value_vars=value_vars, var_name="ELEMENT", value_name="VALUE") station_data_transformed.head() # We can create a time series/histogram of some elements to compare the distribution of the values, here for example precipitation and mean temperature: # In[6]: elements_to_plot = ["RSK", "TMK"] station_data_filtered = station_data_transformed.loc[ station_data_transformed["ELEMENT"].isin(elements_to_plot)].\ sort_values(["STATION_ID", "ELEMENT", "DATE"]) station_data_grouped = station_data_filtered.groupby("ELEMENT")["DATE", "VALUE"] #.plot(x="DATE", y="VALUE") # In[7]: cmap = plt.get_cmap('viridis') colors = cmap(np.linspace(0, 1, 2)) fig, axes = plt.subplots(len(elements_to_plot), len(elements_to_plot), figsize=(10, 10)) for (k, v), (ax1, ax2), color in zip(station_data_grouped, axes.T, colors): v.plot(x="DATE", y="VALUE", label=k, alpha=.75, ax=ax1, c=color) v.plot(y="VALUE", kind="hist", label=k, alpha=.75, ax=ax2) plt.tight_layout() plt.subplots_adjust(top=0.9) plt.suptitle("Precipitation/Mean temperature time series of Dresden, Germany") plt.show() # We can see here that the precipitation is completely left-skewed and not normal distributed, while the temperature is almost normaldistributed! Also the timeseries gives a glimpse on how much data is available! Sad notice here is the gap of WW2. # ## 4. Create yearly values # In[8]: for parameter, group in station_data_filtered.groupby("ELEMENT")["DATE", "VALUE"]: if parameter == "RSK": print(group.groupby(group["DATE"].dt.year)["VALUE"].sum()) else: print(group.groupby(group["DATE"].dt.year)["VALUE"].mean()) # ## 5. Find a station # # We may want to find a station near to a certain area. Therefor simply call get_nearest_station # In[9]: get_nearest_station([51.05089], [13.73832], Parameter.CLIMATE_SUMMARY, TimeResolution.DAILY, PeriodType.HISTORICAL)