Source code for ternip.rule_engine.normalisation_functions.relative_date_functions

#!/usr/bin/env python

import datetime
import calendar

import re

from ternip.rule_engine.normalisation_functions import string_conversions
from ternip.rule_engine.normalisation_functions import date_functions
from ternip.rule_engine import expressions


[docs]def offset_from_date(v, offset, gran='D', exact=False): """ Given a date string and some numeric offset, as well as a unit, then compute the offset from that value by offset gran's. Gran defaults to D. If exact is set to true, then the exact date is figured out, otherwise the level of granuality given by gran is used. Returns a date string. """ gran = string_conversions.units_to_gran(gran) # check for valid refdate if len(v) > 0: # Extract date components into a datetime object for manipulation y = int(v[:4]) m = int(v[4:6]) if len(v) >= 8: d = int(v[6:8]) really_d = True else: really_d = False d = 1 if len(v) >= 11: h = int(v[9:11]) else: h = None dt = datetime.datetime(y, m, d) if len(v) >= 13: min = int(v[11:13]) else: min = None if h is not None: dt = datetime.datetime(y, m, d, h) if len(v) >= 15: s = int(v[13:15]) dt = datetime.datetime(y, m, d, h, min, s) else: s = None if min is not None: dt = datetime.datetime(y, m, d, h, min) elif offset >= 1: return 'FUTURE_REF' elif offset <= -1: return 'PAST_REF' else: return v # Do manipulations if gran == 'TM': # minutes dt += datetime.timedelta(minutes=offset) return dt.strftime('%Y%m%dT%H%M') elif gran == 'TH': # hours dt += datetime.timedelta(hours=offset) if exact: return dt.strftime('%Y%m%dT%H%M') else: return dt.strftime('%Y%m%dT%H') elif gran == 'D': # days dt += datetime.timedelta(days=offset) if exact and min is not None: return dt.strftime('%Y%m%dT%H%M') elif exact and h is not None: return dt.strftime('%Y%m%dT%H') else: return dt.strftime('%Y%m%d') elif gran == 'W' or gran == 'F': # weeks/fortnights if gran == 'F': offset *= 2 dt += datetime.timedelta(weeks=offset) if exact: return dt.strftime('%Y%m%d') else: return dt.strftime('%YW%W') elif gran == 'M': # months - timedelta rather annoyingly doesn't support months, so we # need to do a bit more work here m += offset if m > 12: y += int(m / 12) m %= 12 elif m < 0: y += int(m / 12) m %= 12 if m == 0: m = 12 y -= 1 # avoid bad days dt = None while dt is None and d > 0: try: dt = datetime.datetime(y, m, d) except ValueError: d -= 1 if exact: return dt.strftime('%Y%m%d') else: return dt.strftime('%Y%m') elif gran == 'Y' or gran == 'E' or gran == 'C': # years/decades/centuries - again, need to do a bit more work if gran == 'C': offset *= 100 if gran == 'E': offset *= 10 y += offset # Python doesn't allow datetime objects to be created representing years # before 1970, so do this the old fashioned way if not exact: if gran == 'C': return ("{0:04d}".format(y))[:2] elif gran == 'E': return ("{0:04d}".format(y))[:3] else: return "%04d" % y else: if d == 29 and m == 2 and not calendar.isleap(y): # eugh, mucking about with a date that's not going to be in the # target year - fall back d = 28 if really_d: return "%04d%02d%02d" % (y, m, d) else: return "%04d%02d" % (y, m) elif offset >= 1: return 'FUTURE_REF' elif offset <= -1: return 'PAST_REF' else: return v
[docs]def compute_offset_base(ref_date, expression, current_direction): """ Given a reference date, some simple expression (yesterday/tomorrow or a day of week) and the direction of the relative expression, the base date with which to compute the offset from as a date string """ # No expression or empty match object, do no computation if expression is None: return ref_date # If it's a partial date expression match = re.search(r'^XXXX(\d\d)(\d\d)', expression, re.I) if match is not None: m = int(match.group(1)) d = int(match.group(2)) ref_m = int(ref_date[4:6]) ref_d = int(ref_date[6:8]) if (m < ref_m or (m == ref_m and d < ref_d)) and current_direction > 0: ref_date = offset_from_date(ref_date, 1, 'Y', True) elif (m > ref_m or (m == ref_m and d > ref_d)) and current_direction < 0: ref_date = offset_from_date(ref_date, -1, 'Y', True) return ref_date[:4] + expression[4:] # If it's a day... elif re.search(expressions.DAYS, expression, re.I) is not None: match = re.search(expressions.DAYS, expression, re.I) day = string_conversions.day_to_num(match.group()) t = day - date_functions.date_to_dow(int(ref_date[:4]), int(ref_date[4:6]), int(ref_date[6:8])) if t >= 0 and current_direction < 0: t -= 7 if t <= 0 and current_direction > 0: t += 7 return offset_from_date(ref_date, t) # if it's a month elif re.search('(' + expressions.MONTH_ABBRS + '|' + expressions.MONTHS + ')', expression, re.I) is not None: match = re.search('(' + expressions.MONTH_ABBRS + '|' + expressions.MONTHS + ')', expression, re.I) m = date_functions.month_to_num(match.group()) - int(ref_date[4:6]) if m >= 0 and current_direction < 0: m -= 12 if m <= 0 and current_direction > 0: m += 12 return offset_from_date(ref_date, m, 'M') # if it's a fixed holiday elif re.search(expressions.FIXED_HOLIDAYS, expression, re.I) is not None: match = re.search(expressions.FIXED_HOLIDAYS, expression, re.I) ref_m = int(ref_date[4:6]) ref_d = int(ref_date[6:8]) holdate = string_conversions.fixed_holiday_date(match.group()) hol_m = int(holdate[:2]) hol_d = int(holdate[2:4]) if (hol_m < ref_m or (hol_m == ref_m and hol_d < ref_d)) and current_direction > 0: ref_date = offset_from_date(ref_date, 1, 'Y', True) elif (hol_m > ref_m or (hol_m == ref_m and hol_d > ref_d)) and current_direction < 0: ref_date = offset_from_date(ref_date, -1, 'Y', True) return ref_date[:4] + holdate # if it's an nth dow holiday elif re.search(expressions.NTH_DOW_HOLIDAYS, expression, re.I) is not None: match = re.search(expressions.NTH_DOW_HOLIDAYS, expression, re.I) # Get the date of the event this year and figure out if it's passed or # not ref_m = int(ref_date[4:6]) ref_d = int(ref_date[6:8]) hol_m = string_conversions.nth_dow_holiday_date(match.group(1))[0] hol_d = date_functions.nth_dow_to_day(string_conversions.nth_dow_holiday_date(match.group(1)), int(ref_date[:4])) if (hol_m < ref_m or (hol_m == ref_m and hol_d < ref_d)) and current_direction > 0: ref_date = offset_from_date(ref_date, 1, 'Y', True) elif (hol_m > ref_m or (hol_m == ref_m and hol_d > ref_d)) and current_direction < 0: ref_date = offset_from_date(ref_date, -1, 'Y', True) # Now figure out the date for that year return "%s%02d%02d" % (ref_date[:4], string_conversions.nth_dow_holiday_date(match.group(1))[0], date_functions.nth_dow_to_day(string_conversions.nth_dow_holiday_date(match.group(1)), int(ref_date[:4]))) # if it's a lunar holiday elif re.search(expressions.LUNAR_HOLIDAYS, expression, re.I) is not None: match = re.search(expressions.LUNAR_HOLIDAYS, expression, re.I) hol = match.group() hol = re.sub(r'<([^~]*)[^>]*>', r'\1', hol) hol = re.sub(r'\s', '', hol) hol = hol.lower() easter_offsets = { 'goodfriday': -3, 'shrovetuesday': -47, 'ashwednesday': -46, 'palmsunday': -7, 'easter': 0 } # Get the date of the event this year and figure out if it's passed or # not ref_m = int(ref_date[4:6]) ref_d = int(ref_date[6:8]) hol_date = offset_from_date(date_functions.easter_date(int(ref_date[:4])), easter_offsets[hol]) hol_m = int(hol_date[4:6]) hol_d = int(hol_date[6:8]) if (hol_m < ref_m or (hol_m == ref_m and hol_d < ref_d)) and current_direction > 0: ref_date = offset_from_date(ref_date, 1, 'Y', True) elif (hol_m > ref_m or (hol_m == ref_m and hol_d > ref_d)) and current_direction < 0: ref_date = offset_from_date(ref_date, -1, 'Y', True) hol_y = int(ref_date[:4]) return offset_from_date(date_functions.easter_date(hol_y), easter_offsets[hol]) # Other expressions elif expression.lower().find('yesterday') > -1: return offset_from_date(ref_date, -1) elif expression.lower().find('tomorrow') > -1: return offset_from_date(ref_date, 1) # Couldn't figure out an offset else: return ref_date
def _extract_verbs(s): """ Given a sentence, extract the verbs and their POS tags from it. """ verb = None verb2 = None vpos = None vpos2 = None pos_found = False for (tok, pos, ts) in s: if (pos.upper() == 'VBP' or pos.upper() == 'VBZ' or pos.upper() == 'VBD' or pos.upper() == 'MD') and \ not pos_found: verb = tok.lower() vpos = pos.upper() elif pos_found and tok[:2].upper() == 'VB': verb2 = tok.lower() vpos2 = pos.upper() break if vpos == 'VBP' or vpos == 'VBZ' or vpos == 'MD' and \ re.search(r'going\s+to', ' '.join([tok for (tok, pos, ts) in s]), re.I) is not None: vpos = 'MD' verb = 'going_to' return (verb, vpos, verb2, vpos2)
[docs]def relative_direction_heuristic(before, after): """ Given what preceeds and proceeds a TIMEX, then use heuristics to use tense to compute which direction a relative expression is in. Converted from GUTime. """ # Get the bit after the last TIMEX and before this one lead = before for i in range(-1, -1 * len(before), -1): if len(before[i][2]) > 0: lead = before[i:] break # Okay, now extract the verbs (verb, pos, verb2, pos2) = _extract_verbs(lead) if verb is None: (verb, pos, verb2, pos2) = _extract_verbs(after) if verb is None: (verb, pos, verb2, pos2) = _extract_verbs(before) if verb is None: return 0 # Now try and figure out a relative direction based on the verb information if pos == 'VBD': return -1 elif pos == 'MD': if re.search(r'(will|\'ll|going_to)', verb, re.I) is not None: return 1 elif verb2 == 'have': return -1 elif re.search(r'((w|c|sh)ould|\'d)', verb, re.I) is not None and pos2 == 'VB': return 1 # Use other linguistic cues to determine tense if before[-1][0].lower() == 'since': return -1 elif before[-1][0].lower() == 'until': return 1 return 0