Source code for gwgen.parseghcnrow

# -*- coding: utf-8 -*-
from gwgen._parseghcnrow import parseghcnrow
import six
import pandas as pd
import numpy as np
from itertools import chain
import datetime as dt
import re

if six.PY2:
    from itertools import imap as map, izip as zip

daymon_patt = re.compile(r'(?:\w|-){11}(\d{6})(?:TMAX|TMIN|PRCP)')


[docs]def read_ghcn_file(ifile): """Read in a GHCN station data file and convert it to a dataframe Parameters ---------- ifile: str The path to a ghcn datafile Returns ------- pandas.DataFrame The `ifile` converted to a dataframe""" # get number of days in the file with open(ifile) as f: ndays = np.sum(list(map(ndaymon, np.unique(list(map( lambda m: m.group(1), filter( None, map(daymon_patt.match, f.readlines())))))))) stationid, dates, variables, flags, j = parseghcnrow.parse_station( ifile, ndays or 100) dates = dates[:j] variables = variables[:j].astype(np.float64) flags = flags[:j].astype(np.str_) flags = np.core.defchararray.replace(flags, ' ', '') variables[np.isclose(variables, -9999.) | np.isclose(variables, -999.9)] = np.nan vlst = ['tmin', 'tmax', 'prcp'] df = pd.DataFrame.from_dict(dict(chain( [('id', np.repeat(np.array([stationid]).astype(np.str_), j))], zip(('year', 'month', 'day'), dates.T), zip(vlst, variables.T), chain(*[zip((var + '_m', var + '_q', var + '_s'), arr) for var, arr in zip(vlst, np.rollaxis(flags, 2, 1).T)])))) return df
[docs]def ndaymon(yearmon): """Calculate the number of days for one month in a year Parameters ---------- yearmon: str The first 4 numbers stand for the year, the others for the month (in datetime writing: ``'%Y%m'``)""" year = int(yearmon[:4]) month = int(yearmon[4:]) d = dt.date(year, month, 1) d2 = d.replace( year=year + 1 if month == 12 else year, month=1 if month == 12 else month + 1) return (d2 - d).days