diff --git a/description_parsing_cases.json b/description_parsing_cases.json index 97b2e73..b22ad5d 100644 --- a/description_parsing_cases.json +++ b/description_parsing_cases.json @@ -6,7 +6,7 @@ { "date": "2018-12-30", "isOffDay": true }, { "date": "2018-12-31", "isOffDay": true }, { "date": "2019-01-01", "isOffDay": true }, - { "date": "2019-12-29", "isOffDay": false } + { "date": "2018-12-29", "isOffDay": false } ] }, { @@ -19,6 +19,7 @@ { "date": "2019-02-07", "isOffDay": true }, { "date": "2019-02-08", "isOffDay": true }, { "date": "2019-02-09", "isOffDay": true }, + { "date": "2019-02-10", "isOffDay": true }, { "date": "2019-02-02", "isOffDay": false }, { "date": "2019-02-03", "isOffDay": false } ] @@ -213,7 +214,7 @@ { "date": "2007-12-30", "isOffDay": true }, { "date": "2007-12-31", "isOffDay": true }, { "date": "2008-01-01", "isOffDay": true }, - { "date": "2019-12-29", "isOffDay": false } + { "date": "2007-12-29", "isOffDay": false } ] }, { diff --git a/fetch_holidays.py b/fetch_holidays.py index f550f16..897f270 100644 --- a/fetch_holidays.py +++ b/fetch_holidays.py @@ -5,7 +5,7 @@ import argparse import json import re from datetime import date, timedelta -from typing import List +from typing import List, Optional import bs4 import requests @@ -76,13 +76,18 @@ class SentenceParser: memory.add(i) yield i + def get_date(self, year: Optional[int], month: int, day: int) -> date: + if year is None and month > 10: + year = self.year - 1 + year = year or self.year + return date(year=year, month=month, day=day) + def _extract_dates_1(self, value): match = re.match(r'(?:(\d+)年)?(?:(\d+)月)(\d+)日', value) if match: groups = [_cast_int(i) for i in match.groups()] assert len(groups) == 3, groups - yield date(year=groups[0] or self.year, - month=groups[1], day=groups[2]) + yield self.get_date(year=groups[0], month=groups[1], day=groups[2]) def _extract_dates_2(self, value): match = re.match( @@ -90,10 +95,10 @@ class SentenceParser: if match: groups = [_cast_int(i) for i in match.groups()] assert len(groups) == 6, groups - start = date(year=groups[0] or self.year, - month=groups[1], day=groups[2]) - end = date(year=groups[3] or self.year, - month=groups[4] or groups[1], day=groups[5]) + start = self.get_date(year=groups[0], + month=groups[1], day=groups[2]) + end = self.get_date(year=groups[3], + month=groups[4] or groups[1], day=groups[5]) for i in range((end - start).days + 1): yield start + timedelta(days=i) @@ -107,13 +112,12 @@ class SentenceParser: month = None day = None for i in range(0, len(groups), 3): - year = groups[i] or year + year = groups[i] month = groups[i+1] or month day = groups[i+2] - assert year assert month assert day - yield date(year=year, month=month, day=day) + yield self.get_date(year=year, month=month, day=day) date_extraction_methods = [ _extract_dates_1, @@ -149,7 +153,7 @@ class SentenceParser: 'isOffDay': False } - def _parse_work_2(self): + def _parse_shift_1(self): match = re.match('(.+)公休日调至(.+)', self.sentence) if match: for i in self.extract_dates(match.group(1)): @@ -166,7 +170,7 @@ class SentenceParser: parsing_methods = [ _parse_rest_1, _parse_work_1, - _parse_work_2, + _parse_shift_1, ]