#!/usr/bin/env python
# coding: utf-8

# # wetterdienst - A simple example
# 
# pip install wetterdienst

# ## Import modules necessary for general functioning

# In[1]:


from wetterdienst import metadata_for_dwd_data, collect_dwd_data, get_nearest_station
from wetterdienst.enumerations.period_type_enumeration import PeriodType
from wetterdienst.enumerations.time_resolution_enumeration import TimeResolution
from wetterdienst.enumerations.parameter_enumeration import Parameter

import numpy as np
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm


# ## 1. First check the metadata to inform yourself of available stations
# (here we pick historical daily precipitation - hdp)

# In[2]:


metadata_hdp = metadata_for_dwd_data(
    Parameter.PRECIPITATION_MORE, TimeResolution.DAILY, PeriodType.HISTORICAL)
print("Number of stations with available data: ", metadata_hdp["HAS_FILE"].sum())
print("Some of the stations:")
metadata_hdp.head()


# The metadata includes an id, the range of the measurements, the position  (including height) as well as place and state of it and if it has a file. With the following plot we want to show a map of those stations:

# In[3]:


cmap = cm.get_cmap('viridis')
bounds = metadata_hdp.STATIONHEIGHT.quantile([0, 0.25, 0.5, 0.75, 1]).values
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)

fig, ax = plt.subplots(figsize=(10, 10))
plot = metadata_hdp.plot.scatter(
    x="LON", y="LAT", c="STATIONHEIGHT", cmap=cmap, norm=norm, ax=ax)
plot.set_title("Map of daily precipitation stations in Germany\n"
               "Color refers to height of station")
plt.show()


# ## 2. The usual way of retrieving data

# Usually there are three steps to follow:
# - select indexed files based on
#     - its station_id
#         - "1048" for Dresden, Germany
#     - its parameter
#         - "kl" for climate
#     - its time_resolution
#         - "daily" for daily data
#     - its period_type
#         - "historical" for data up to the end of the last year
# - download the resulting list of files
# - parse it into pandas.DataFrames
# 
# We have summarized those steps into one:
# - collect_dwd_data
# 
# Let's try it out for the above selection:

# In[4]:


print("Receiving historical daily climate data for Dresden-Klotzsche (1048)")
station_data = collect_dwd_data(
    [1048], Parameter.CLIMATE_SUMMARY, TimeResolution.DAILY, PeriodType.HISTORICAL)

station_data


# See that DATE is already parsed, so we can easily get some nice graphs with matplotlib, which we will
# do in the next part.

# ## 3. Let's create some plots

# First to handle the data easier, we want it to be transformed from tabular to column data,
# which means instead of having the data in several columns with an additional date column,
# we want three columns, where a set of all three defines the date of measured data, element
# and the exact value. To make it clear look at the following table, which is already transformed.

# In[5]:


id_vars = ["STATION_ID", "DATE"]
value_vars = [var for var in station_data.columns if var not in id_vars]
station_data_transformed = station_data.melt(
    id_vars=id_vars, value_vars=value_vars, var_name="ELEMENT", value_name="VALUE")

station_data_transformed.head()


# We can create a time series/histogram of some elements to compare the distribution of the values, here for example precipitation and mean temperature:

# In[6]:


elements_to_plot = ["RSK", "TMK"]
station_data_filtered = station_data_transformed.loc[
    station_data_transformed["ELEMENT"].isin(elements_to_plot)].\
    sort_values(["STATION_ID", "ELEMENT", "DATE"])

station_data_grouped = station_data_filtered.groupby("ELEMENT")["DATE", "VALUE"] #.plot(x="DATE", y="VALUE")


# In[7]:


cmap = plt.get_cmap('viridis')
colors = cmap(np.linspace(0, 1, 2))

fig, axes = plt.subplots(len(elements_to_plot), len(elements_to_plot), figsize=(10, 10))

for (k, v), (ax1, ax2), color in zip(station_data_grouped, axes.T, colors):
    v.plot(x="DATE", y="VALUE", label=k, alpha=.75, ax=ax1, c=color)
    v.plot(y="VALUE", kind="hist", label=k, alpha=.75, ax=ax2)
    
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.suptitle("Precipitation/Mean temperature time series of Dresden, Germany")

plt.show()


# We can see here that the precipitation is completely left-skewed and not normal distributed, while the temperature is almost normaldistributed! Also the timeseries gives a glimpse on how much data is available! Sad notice here is the gap of WW2.

# ## 4. Create yearly values

# In[8]:


for parameter, group in station_data_filtered.groupby("ELEMENT")["DATE", "VALUE"]:
    if parameter == "RSK":
        print(group.groupby(group["DATE"].dt.year)["VALUE"].sum())
    else:
        print(group.groupby(group["DATE"].dt.year)["VALUE"].mean())


# ## 5. Find a station
# 
# We may want to find a station near to a certain area. Therefor simply call get_nearest_station

# In[9]:


get_nearest_station([51.05089], [13.73832], Parameter.CLIMATE_SUMMARY, TimeResolution.DAILY, PeriodType.HISTORICAL)