#!/usr/bin/env python # coding: utf-8 # # wetterdienst - A simple example # # pip install wetterdienst # ## Import modules necessary for general functioning # In[1]: import warnings warnings.filterwarnings("ignore") from wetterdienst.dwd.observations import DWDObservationMetadata, DWDObservationStations, \ DWDObservationData, DWDObservationPeriod, DWDObservationResolution, \ DWDObservationParameter, DWDObservationParameterSet get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib import cm # Which parameters are available? # In[2]: # all print("All available combinations") print( DWDObservationMetadata().discover_parameters() ) # selection print("Selection of daily historical data") print( DWDObservationMetadata( resolution=DWDObservationResolution.DAILY, period=DWDObservationPeriod.HISTORICAL ).discover_parameters() ) # ## 1. First check the metadata to inform yourself of available stations # (here we pick historical daily precipitation - hdp) # In[3]: sites_hdp = DWDObservationStations( parameter_set=DWDObservationParameterSet.PRECIPITATION_MORE, resolution=DWDObservationResolution.DAILY, period=DWDObservationPeriod.HISTORICAL ) print("Number of stations with available data: ", sites_hdp.all().sum()) print("Some of the stations:") sites_hdp.all().head() # The metadata includes an id, the range of the measurements, the position # (including height) as well as place and state of it and if it has a file. With the # following plot we want to show a map of those stations: # In[4]: cmap = cm.get_cmap('viridis') bounds = sites_hdp.all().STATION_HEIGHT.quantile([0, 0.25, 0.5, 0.75, 1]).values norm = mpl.colors.BoundaryNorm(bounds, cmap.N) fig, ax = plt.subplots(figsize=(10, 10)) plot = sites_hdp.all().plot.scatter( x="LON", y="LAT", c="STATION_HEIGHT", cmap=cmap, norm=norm, ax=ax) plot.set_title("Map of daily precipitation stations in Germany\n" "Color refers to height of station") plt.show() # ## 2. The usual way of retrieving data # Usually there are three steps to follow: # - select indexed files based on # - its station_id # - "1048" for Dresden, Germany # - its parameter # - "kl" for climate # - its time_resolution # - "daily" for daily data # - its period_type # - "historical" for data up to the end of the last year # - download the resulting list of files # - parse it into pandas.DataFrames # # We have summarized those steps into one: # - collect_dwd_data # # Let's try it out for the above selection: # In[5]: print("Receiving historical daily climate data for Dresden-Klotzsche (1048)") station_data = DWDObservationData( station_ids=[1048], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, periods=[DWDObservationPeriod.HISTORICAL], tidy_data=False ).collect_safe() station_data.dropna(axis=0).head() # See that DATE is already parsed, so we can easily get some nice graphs with matplotlib, # which we will do in the next part. We may as well also use the "tidy_data" option to # receive a nicer dataframe and "humanize_column_names" to get meaningful parameters: # In[6]: print("Receiving historical daily climate data for Dresden-Klotzsche (1048), this time tidied.") station_data = DWDObservationData( station_ids=[1048], parameters=[DWDObservationParameterSet.CLIMATE_SUMMARY], resolution=DWDObservationResolution.DAILY, periods=[DWDObservationPeriod.HISTORICAL], tidy_data=True, humanize_parameters=True ).collect_safe() station_data.dropna(axis=0).groupby("PARAMETER").first() # With the latest update we can now also request direct parameters from the given sets. # We know that climate_summary contains TMK, TNK, TXK and RSK, so let's request those. # This option will automatically tidy the data. # In[7]: print("Receiving historical daily temperature and precipitation for Dresden-Klotzsche " "(1048).") station_data = DWDObservationData( station_ids=[1048], parameters=[DWDObservationParameter.DAILY.TEMPERATURE_AIR_200, DWDObservationParameter.DAILY.TEMPERATURE_AIR_MAX_200, DWDObservationParameter.DAILY.TEMPERATURE_AIR_MIN_200, DWDObservationParameter.DAILY.PRECIPITATION_HEIGHT], resolution=DWDObservationResolution.DAILY, periods=[DWDObservationPeriod.HISTORICAL], humanize_parameters=False ).collect_safe() station_data.dropna(axis=0).head() # ## 3. Let's create some plots # # Now that we have data, let's create some plots! We can create a time series/histogram of # the temperatures and precipitation # In[8]: cmap = plt.get_cmap('viridis', 4) colors = cmap.colors PARAMETERS = ["TNK", "TMK", "TXK", "RSK"] station_data_grouped = station_data.groupby(station_data["PARAMETER"], observed=True) fig, axes = plt.subplots(nrows=len(PARAMETERS), tight_layout=True, figsize=(10, 40)) for (parameter, group), ax, color in zip(station_data_grouped, axes, colors): group.plot(x="DATE", y="VALUE", label=parameter, alpha=.75, ax=ax, c=color) plt.tight_layout() plt.subplots_adjust(top=0.9) plt.suptitle("Temperature and precipitation time series of Dresden, Germany") plt.show() # ## 4. Create yearly values # In[9]: import numpy as np import pandas as pd station_data_yearly = [] for (year, parameter), group in station_data.groupby( [station_data["DATE"].dt.year, "PARAMETER"], as_index=False, observed=True): if parameter == "RSK": station_data_yearly.append(group.dropna().agg({"VALUE": np.sum})) else: station_data_yearly.append(group.dropna().agg({"VALUE": np.mean})) station_data_yearly = pd.concat(station_data_yearly) station_data_yearly # ## 5. Find a station # # We may want to find a station near to a certain area. Therefor simply call get_nearest_station # In[10]: DWDObservationStations( parameter_set=DWDObservationParameterSet.CLIMATE_SUMMARY, resolution=DWDObservationResolution.DAILY, period=DWDObservationPeriod.HISTORICAL, start_date="2000-01-01", end_date="2010-01-01" ).nearby_number( 51.05089, 13.73832, 5 )