In [1]:
import re
import collections
import when
import pytz

substitutions = when.substitutions

In [63]:
class ParsingError(ValueError):

    pass


class NotNoneDict(collections.MutableMapping):
    
    def __init__(self, defaults):
        self._mapping = {}
        self._mapping.update(defaults)

    def __getitem__(self, item):
        return self._mapping[item]
        
    def __setitem__(self, item, value):
        if item in self._mapping:
            if value is not None:
                self._mapping[item] = value
            else:
                pass
        else:
            self._mapping[item] = value
        
    def __delitem__(self, item):
        del self._mapping[item]
        
    def __iter__(self):
        return iter(self._mapping)
        
    def __len__(self):
        return len(self._mapping)
    
    def __str__(self):
        return str(self._mapping)
    
    def __repr__(self):
        return repr(self._mapping)


def _scrub_potentials(*potentials):
    potentials = [potential for potential in potentials if potential is not None]
    if len(potentials) == 0:
        return None
    first = potentials[0]
    if not all(first == potential for potential in potentials):
        raise pytz.AmbiguousTimeError()
    return first


def process_year(year_from_1776, decade_from_76, century):
    # pre-process
    if decade_from_76 is not None:
        if century is None:
            raise pytz.ValueError()
        year_from_76 = century + int(decade_from_76)
    else:
        year_from_76 = None
    if year_from_1776 is not None:
        year_from_1776 = int(year_from_1776)
        if century is not None:
            if abs(year_from_1776 - century) > 100:
                raise pytz.AmbiguousTimeError()
    # return year, catching any ambiguities
    return _scrub_potentials(year_from_1776, year_from_76)


def process_month(month_from_July, month_from_Jul, month_from_07, month_from_7):
    month_to_int = {
        'January': 1,
        'Jan': 1,
        'February': 2,
        'Feb': 2,
        'March': 3,
        'Mar': 3,
        'April': 4,
        'Apr': 4,
        'May': 5,
        'June': 6,
        'Jun': 6,
        'July': 7,
        'Jul': 7,
        'August': 8,
        'Aug': 8,
        'September': 9,
        'Sep': 9,
        'October': 10,
        'Oct': 10,
        'November': 11,
        'Nov': 11,
        'December': 12,
        'Dec': 12,
        None: None
    }
    month_from_July = month_to_int[month_from_July]
    month_from_Jul = month_to_int[month_from_Jul]
    if month_from_07 is not None:
        month_from_07 = int(month_from_07)
    if month_from_7 is not None:
        month_from_7 = int(month_from_7)
    # return month, catching any ambiguities
    return _scrub_potentials(month_from_July, month_from_Jul, month_from_07, month_from_7)
    

def process_day(day_from_04, day_from_4):
    # pre-process
    if day_from_04 is not None:
        day_from_04 = int(day_from_04)
    if day_from_4 is not None:
        day_from_4 = int(day_from_4)
    # return day, catching any ambiguities
    return _scrub_potentials(day_from_04, day_from_4)


def process_hour(hour_from_13, hour_from_01, hour_from_1, meridian_from_pm,
                 meridian_from_p_m_, meridian_from_PM, meridian_from_P_M_):
    # pre-process
    if meridian_from_pm is not None:
        if meridian_from_pm == 'pm':
            meridian_from_pm = 12
        else: # am
            meridian_from_pm = 0
    if meridian_from_p_m_ is not None:
        if meridian_from_p_m_ == 'p.m.':
            meridian_from_p_m_ = 12
        else: # a.m.
            meridian_from_p_m_ = 0
    if meridian_from_PM is not None:
        if meridian_from_PM == 'PM':
            meridian_from_PM = 12
        else: # AM
            meridian_from_PM = 0
    if meridian_from_P_M_ is not None:
        if meridian_from_P_M_ == 'P.M.':
            meridian_from_P_M_ = 12
        else: # AM
            meridian_from_P_M_ = 0
    meridian_offset = _scrub_potentials(meridian_from_pm, meridian_from_p_m_, 
                                        meridian_from_PM, meridian_from_P_M_)
    if hour_from_13 is not None:
        hour_from_13 = int(hour_from_13)
    if hour_from_01 is not None:
        hour_from_01 = meridian_offset + int(hour_from_01)
    if hour_from_1 is not None:
        hour_from_1 = meridian_offset + int(hour_from_1)
    # return hour, catching any ambiguities
    return _scrub_potentials(hour_from_13, hour_from_01, hour_from_1)


def process_minute(minute_from_02, minute_from_2):
    # pre-process
    if minute_from_02 is not None:
        minute_from_02 = int(minute_from_02)
    if minute_from_2 is not None:
        minute_from_2 = int(minute_from_2)
    # return minute, catching any ambiguities
    return _scrub_potentials(minute_from_02, minute_from_2)


def process_second(second_from_03, second_from_3):
    # pre-process
    if second_from_03 is not None:
        second_from_03 = int(second_from_03)
    if second_from_3 is not None:
        second_from_3 = int(second_from_3)
    # return second, catching any ambiguities
    return _scrub_potentials(second_from_03, second_from_3)


def process_fractional_sections(millisecond_from_012, millisecond_from_12, microsecond_from_12345,
                                microsecond_from_012345):
    # pre-process
    if millisecond_from_012 is not None:
        millisecond_from_012 = 1000*int(millisecond_from_012)
    if millisecond_from_12 is not None:
        millisecond_from_12 = 1000*int(millisecond_from_12)
    if microsecond_from_12345 is not None:
        microsecond_from_12345 = int(microsecond_from_12345)
    if microsecond_from_012345 is not None:
        microsecond_from_012345 = int(microsecond_from_012345)
    # return second, catching any ambiguities
    return _scrub_potentials(millisecond_from_012, millisecond_from_12, microsecond_from_12345,
                             microsecond_from_012345)


def process_timezone(timezone_from_America_New_York):
    if timezone_from_America_New_York in ('z', 'Z'):
        return 'utc'
    else:
        return timezone_from_America_New_York


def from_string(string, specifier, century=None, year=None, month=None, 
                day=None, hour=0, minute=0, second=0, millisecond=0, 
                microsecond=0, meridian=None, timezone=None, 
                dst_if_ambiguous=None):
    """ Construct a When from the first matching string specifier.

    """
    # pre-processing
    if millisecond is not None and microsecond is None:
        microsecond = 1000*millisecond
    elif millisecond is not None and microsecond is not None:
        if 1000*millisecond != microsecond:
            raise pytz.AmbiguousTimeError()
    # first pass of substitutions on specifier to prepare regex
    substitutions_for_regex = {
        '1776': r'(?P<_1776>\d?\d?\d?\d)',
        '76': r'(?P<_76>\d\d)',
        'July': r'(?P<_July>January|February|March|April|May|June|July|August|September|October|November|December)',
        'Jul': r'(?P<_Jul>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)',
        'America/New_York': r'(?P<timezone>Z|z|[a-zA-Z_/]+)',
        '012345': r'(?P<_012345>\d\d\d\d\d\d)',
        '12345': r'(?P<_12345>\d?\d?\d?\d?\d?\d)',
        '012': r'(?P<_012>\d\d\d)',
        '12': r'(?P<_12>\d?\d?\d)',
        '13': r'(?P<_13>\d\d)',
        '07': r'(?P<_07>\d\d)',
        '04': r'(?P<_04>\d\d)',
        '03': r'(?P<_03>\d\d)',
        '02': r'(?P<_02>\d\d)',
        '01': r'(?P<_01>\d\d)',
        '7': r'(?P<_7>\d?\d)',
        '4': r'(?P<_4>\d?\d)',
        '3': r'(?P<_3>\d?\d)',
        '2': r'(?P<_2>\d?\d)',
        '1': r'(?P<_1>\d?\d)',
        'pm': r'(?P<_pm>am|pm)',
        'p.m.': r'(?P<_p_m_>a\.m\.|p\.m\.)',
        'PM': r'(?P<_PM>AM|PM)',
        'P.M.': r'(?P<_P_M_>A\.M\.|P\.M\.)',
    }
    # iterate over specifiers and return first match
    required_matches = ('year', 'month', 'day', 'timezone')
    matched = NotNoneDict({
        'year': year,
        'month': month,
        'day': day,
        'hour': hour,
        'minute': minute,
        'second': second,
        'microsecond': microsecond,
        'timezone': timezone,
        'dst_if_ambiguous': dst_if_ambiguous,
    })  
    regex = substitutions.in_string(specifier, substitutions_for_regex)
    match = re.match(regex, string)
    if match is None:
        raise ParsingError()
    unprocessed_matches = match.groupdict()
    # process year
    year_from_1776 = unprocessed_matches.get('_1776', None)
    decade_from_76 = unprocessed_matches.get('_76', None)
    matched['year'] = process_year(year_from_1776, decade_from_76, century)
    # process month
    month_from_July = unprocessed_matches.get('_July', None)
    month_from_Jul = unprocessed_matches.get('_Jul', None)
    month_from_07 = unprocessed_matches.get('_07', None)
    month_from_7 = unprocessed_matches.get('_7', None)
    matched['month'] = process_month(month_from_July, month_from_Jul, month_from_07, month_from_7)
    # day
    month_from_04 = unprocessed_matches.get('_04', None)
    month_from_4 = unprocessed_matches.get('_4', None)
    matched['day'] = process_day(month_from_04, month_from_4)
    # hour
    hour_from_13 = unprocessed_matches.get('_13', None)
    hour_from_01 = unprocessed_matches.get('_01', None)
    hour_from_1 = unprocessed_matches.get('_1', None)
    meridian_from_pm = unprocessed_matches.get('_pm', None)
    meridian_from_p_m_ = unprocessed_matches.get('_p_m_', None)
    meridian_from_PM = unprocessed_matches.get('_PM', None)
    meridian_from_P_M_ = unprocessed_matches.get('_P_M_', None)
    matched['hour'] = process_hour(hour_from_13, hour_from_01, hour_from_1, meridian_from_pm,
                                   meridian_from_p_m_, meridian_from_PM, meridian_from_P_M_)
    # minute
    minute_from_02 = unprocessed_matches.get('_02', None)
    minute_from_2 = unprocessed_matches.get('_2', None)
    matched['minute'] = process_minute(minute_from_02, minute_from_2)
    # second
    second_from_03 = unprocessed_matches.get('_03', None)
    second_from_3 = unprocessed_matches.get('_3', None)
    matched['second'] = process_second(second_from_03, second_from_3)
    # millisecond and microsecond
    millisecond_from_012 = unprocessed_matches.get('_012', None)
    millisecond_from_12 = unprocessed_matches.get('_12', None)
    microsecond_from_12345 = unprocessed_matches.get('_12345', None)
    microsecond_from_012345 = unprocessed_matches.get('_012345', None)
    matched['microsecond'] = process_fractional_sections(millisecond_from_012, millisecond_from_12, 
                                                         microsecond_from_12345, microsecond_from_012345)
    # timezone
    timezone_from_America_New_York = unprocessed_matches.get('timezone', None)
    matched['timezone'] = process_timezone(timezone_from_America_New_York)
    # 
    return when.When(**matched)

In [66]:
from_string('2015-03-03 2:00:59.222222z a.m.', '1776-07-04 1:02:03.012345America/New_York p.m.', timezone='utc')


Out[66]:
When(2015, 3, 3, 2, 0, 59, 222222, 'utc', False)

In [ ]:


In [ ]: