In [1]:
import re
import collections
import when
import pytz
substitutions = when.substitutions
In [63]:
class ParsingError(ValueError):
pass
class NotNoneDict(collections.MutableMapping):
def __init__(self, defaults):
self._mapping = {}
self._mapping.update(defaults)
def __getitem__(self, item):
return self._mapping[item]
def __setitem__(self, item, value):
if item in self._mapping:
if value is not None:
self._mapping[item] = value
else:
pass
else:
self._mapping[item] = value
def __delitem__(self, item):
del self._mapping[item]
def __iter__(self):
return iter(self._mapping)
def __len__(self):
return len(self._mapping)
def __str__(self):
return str(self._mapping)
def __repr__(self):
return repr(self._mapping)
def _scrub_potentials(*potentials):
potentials = [potential for potential in potentials if potential is not None]
if len(potentials) == 0:
return None
first = potentials[0]
if not all(first == potential for potential in potentials):
raise pytz.AmbiguousTimeError()
return first
def process_year(year_from_1776, decade_from_76, century):
# pre-process
if decade_from_76 is not None:
if century is None:
raise pytz.ValueError()
year_from_76 = century + int(decade_from_76)
else:
year_from_76 = None
if year_from_1776 is not None:
year_from_1776 = int(year_from_1776)
if century is not None:
if abs(year_from_1776 - century) > 100:
raise pytz.AmbiguousTimeError()
# return year, catching any ambiguities
return _scrub_potentials(year_from_1776, year_from_76)
def process_month(month_from_July, month_from_Jul, month_from_07, month_from_7):
month_to_int = {
'January': 1,
'Jan': 1,
'February': 2,
'Feb': 2,
'March': 3,
'Mar': 3,
'April': 4,
'Apr': 4,
'May': 5,
'June': 6,
'Jun': 6,
'July': 7,
'Jul': 7,
'August': 8,
'Aug': 8,
'September': 9,
'Sep': 9,
'October': 10,
'Oct': 10,
'November': 11,
'Nov': 11,
'December': 12,
'Dec': 12,
None: None
}
month_from_July = month_to_int[month_from_July]
month_from_Jul = month_to_int[month_from_Jul]
if month_from_07 is not None:
month_from_07 = int(month_from_07)
if month_from_7 is not None:
month_from_7 = int(month_from_7)
# return month, catching any ambiguities
return _scrub_potentials(month_from_July, month_from_Jul, month_from_07, month_from_7)
def process_day(day_from_04, day_from_4):
# pre-process
if day_from_04 is not None:
day_from_04 = int(day_from_04)
if day_from_4 is not None:
day_from_4 = int(day_from_4)
# return day, catching any ambiguities
return _scrub_potentials(day_from_04, day_from_4)
def process_hour(hour_from_13, hour_from_01, hour_from_1, meridian_from_pm,
meridian_from_p_m_, meridian_from_PM, meridian_from_P_M_):
# pre-process
if meridian_from_pm is not None:
if meridian_from_pm == 'pm':
meridian_from_pm = 12
else: # am
meridian_from_pm = 0
if meridian_from_p_m_ is not None:
if meridian_from_p_m_ == 'p.m.':
meridian_from_p_m_ = 12
else: # a.m.
meridian_from_p_m_ = 0
if meridian_from_PM is not None:
if meridian_from_PM == 'PM':
meridian_from_PM = 12
else: # AM
meridian_from_PM = 0
if meridian_from_P_M_ is not None:
if meridian_from_P_M_ == 'P.M.':
meridian_from_P_M_ = 12
else: # AM
meridian_from_P_M_ = 0
meridian_offset = _scrub_potentials(meridian_from_pm, meridian_from_p_m_,
meridian_from_PM, meridian_from_P_M_)
if hour_from_13 is not None:
hour_from_13 = int(hour_from_13)
if hour_from_01 is not None:
hour_from_01 = meridian_offset + int(hour_from_01)
if hour_from_1 is not None:
hour_from_1 = meridian_offset + int(hour_from_1)
# return hour, catching any ambiguities
return _scrub_potentials(hour_from_13, hour_from_01, hour_from_1)
def process_minute(minute_from_02, minute_from_2):
# pre-process
if minute_from_02 is not None:
minute_from_02 = int(minute_from_02)
if minute_from_2 is not None:
minute_from_2 = int(minute_from_2)
# return minute, catching any ambiguities
return _scrub_potentials(minute_from_02, minute_from_2)
def process_second(second_from_03, second_from_3):
# pre-process
if second_from_03 is not None:
second_from_03 = int(second_from_03)
if second_from_3 is not None:
second_from_3 = int(second_from_3)
# return second, catching any ambiguities
return _scrub_potentials(second_from_03, second_from_3)
def process_fractional_sections(millisecond_from_012, millisecond_from_12, microsecond_from_12345,
microsecond_from_012345):
# pre-process
if millisecond_from_012 is not None:
millisecond_from_012 = 1000*int(millisecond_from_012)
if millisecond_from_12 is not None:
millisecond_from_12 = 1000*int(millisecond_from_12)
if microsecond_from_12345 is not None:
microsecond_from_12345 = int(microsecond_from_12345)
if microsecond_from_012345 is not None:
microsecond_from_012345 = int(microsecond_from_012345)
# return second, catching any ambiguities
return _scrub_potentials(millisecond_from_012, millisecond_from_12, microsecond_from_12345,
microsecond_from_012345)
def process_timezone(timezone_from_America_New_York):
if timezone_from_America_New_York in ('z', 'Z'):
return 'utc'
else:
return timezone_from_America_New_York
def from_string(string, specifier, century=None, year=None, month=None,
day=None, hour=0, minute=0, second=0, millisecond=0,
microsecond=0, meridian=None, timezone=None,
dst_if_ambiguous=None):
""" Construct a When from the first matching string specifier.
"""
# pre-processing
if millisecond is not None and microsecond is None:
microsecond = 1000*millisecond
elif millisecond is not None and microsecond is not None:
if 1000*millisecond != microsecond:
raise pytz.AmbiguousTimeError()
# first pass of substitutions on specifier to prepare regex
substitutions_for_regex = {
'1776': r'(?P<_1776>\d?\d?\d?\d)',
'76': r'(?P<_76>\d\d)',
'July': r'(?P<_July>January|February|March|April|May|June|July|August|September|October|November|December)',
'Jul': r'(?P<_Jul>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)',
'America/New_York': r'(?P<timezone>Z|z|[a-zA-Z_/]+)',
'012345': r'(?P<_012345>\d\d\d\d\d\d)',
'12345': r'(?P<_12345>\d?\d?\d?\d?\d?\d)',
'012': r'(?P<_012>\d\d\d)',
'12': r'(?P<_12>\d?\d?\d)',
'13': r'(?P<_13>\d\d)',
'07': r'(?P<_07>\d\d)',
'04': r'(?P<_04>\d\d)',
'03': r'(?P<_03>\d\d)',
'02': r'(?P<_02>\d\d)',
'01': r'(?P<_01>\d\d)',
'7': r'(?P<_7>\d?\d)',
'4': r'(?P<_4>\d?\d)',
'3': r'(?P<_3>\d?\d)',
'2': r'(?P<_2>\d?\d)',
'1': r'(?P<_1>\d?\d)',
'pm': r'(?P<_pm>am|pm)',
'p.m.': r'(?P<_p_m_>a\.m\.|p\.m\.)',
'PM': r'(?P<_PM>AM|PM)',
'P.M.': r'(?P<_P_M_>A\.M\.|P\.M\.)',
}
# iterate over specifiers and return first match
required_matches = ('year', 'month', 'day', 'timezone')
matched = NotNoneDict({
'year': year,
'month': month,
'day': day,
'hour': hour,
'minute': minute,
'second': second,
'microsecond': microsecond,
'timezone': timezone,
'dst_if_ambiguous': dst_if_ambiguous,
})
regex = substitutions.in_string(specifier, substitutions_for_regex)
match = re.match(regex, string)
if match is None:
raise ParsingError()
unprocessed_matches = match.groupdict()
# process year
year_from_1776 = unprocessed_matches.get('_1776', None)
decade_from_76 = unprocessed_matches.get('_76', None)
matched['year'] = process_year(year_from_1776, decade_from_76, century)
# process month
month_from_July = unprocessed_matches.get('_July', None)
month_from_Jul = unprocessed_matches.get('_Jul', None)
month_from_07 = unprocessed_matches.get('_07', None)
month_from_7 = unprocessed_matches.get('_7', None)
matched['month'] = process_month(month_from_July, month_from_Jul, month_from_07, month_from_7)
# day
month_from_04 = unprocessed_matches.get('_04', None)
month_from_4 = unprocessed_matches.get('_4', None)
matched['day'] = process_day(month_from_04, month_from_4)
# hour
hour_from_13 = unprocessed_matches.get('_13', None)
hour_from_01 = unprocessed_matches.get('_01', None)
hour_from_1 = unprocessed_matches.get('_1', None)
meridian_from_pm = unprocessed_matches.get('_pm', None)
meridian_from_p_m_ = unprocessed_matches.get('_p_m_', None)
meridian_from_PM = unprocessed_matches.get('_PM', None)
meridian_from_P_M_ = unprocessed_matches.get('_P_M_', None)
matched['hour'] = process_hour(hour_from_13, hour_from_01, hour_from_1, meridian_from_pm,
meridian_from_p_m_, meridian_from_PM, meridian_from_P_M_)
# minute
minute_from_02 = unprocessed_matches.get('_02', None)
minute_from_2 = unprocessed_matches.get('_2', None)
matched['minute'] = process_minute(minute_from_02, minute_from_2)
# second
second_from_03 = unprocessed_matches.get('_03', None)
second_from_3 = unprocessed_matches.get('_3', None)
matched['second'] = process_second(second_from_03, second_from_3)
# millisecond and microsecond
millisecond_from_012 = unprocessed_matches.get('_012', None)
millisecond_from_12 = unprocessed_matches.get('_12', None)
microsecond_from_12345 = unprocessed_matches.get('_12345', None)
microsecond_from_012345 = unprocessed_matches.get('_012345', None)
matched['microsecond'] = process_fractional_sections(millisecond_from_012, millisecond_from_12,
microsecond_from_12345, microsecond_from_012345)
# timezone
timezone_from_America_New_York = unprocessed_matches.get('timezone', None)
matched['timezone'] = process_timezone(timezone_from_America_New_York)
#
return when.When(**matched)
In [66]:
from_string('2015-03-03 2:00:59.222222z a.m.', '1776-07-04 1:02:03.012345America/New_York p.m.', timezone='utc')
Out[66]:
In [ ]:
In [ ]: