Fix missiong days in 2007, 2011

This commit is contained in:
NateScarlet 2019-03-09 19:35:24 +08:00
parent 56f9e396c8
commit 73becdc0c9
No known key found for this signature in database
GPG Key ID: 5C242793B070309C
5 changed files with 158 additions and 82 deletions

View File

@ -29,6 +29,11 @@
"date": "2007-01-03",
"isOffDay": true
},
{
"name": "春节",
"date": "2007-02-17",
"isOffDay": false
},
{
"name": "春节",
"date": "2007-02-18",
@ -64,6 +69,11 @@
"date": "2007-02-24",
"isOffDay": true
},
{
"name": "春节",
"date": "2007-02-25",
"isOffDay": false
},
{
"name": "“五一”",
"date": "2007-04-28",

View File

@ -29,6 +29,11 @@
"date": "2011-02-02",
"isOffDay": true
},
{
"name": "春节",
"date": "2011-02-08",
"isOffDay": true
},
{
"name": "春节",
"date": "2011-02-12",

View File

@ -422,5 +422,61 @@
{ "date": "2008-09-27", "isOffDay": false },
{ "date": "2008-09-28", "isOffDay": false }
]
},
{
"year": 2007,
"description": "1月1日—3日放假共3天。其中1月1日为法定假日将2006年12月30日星期六)、31日星期日两个公休日分别调至2007年1月2日、3日2006年12月30日星期六)、31日星期日上班。",
"expected": [
{ "date": "2007-01-01", "isOffDay": true },
{ "date": "2007-01-02", "isOffDay": true },
{ "date": "2007-01-03", "isOffDay": true },
{ "date": "2006-12-30", "isOffDay": false },
{ "date": "2006-12-31", "isOffDay": false }
]
},
{
"year": 2007,
"description": "2月18日—24日即农历大年初一至初七放假共7天。其中18日、19日、20日为法定假日将17日星期六)、18日星期日)、25日星期日三个公休日分别调至21日星期三)、22日星期四)、23日星期五)24日星期六照常公休17日、25日上班。",
"expected": [
{ "date": "2007-02-18", "isOffDay": true },
{ "date": "2007-02-19", "isOffDay": true },
{ "date": "2007-02-20", "isOffDay": true },
{ "date": "2007-02-21", "isOffDay": true },
{ "date": "2007-02-22", "isOffDay": true },
{ "date": "2007-02-23", "isOffDay": true },
{ "date": "2007-02-24", "isOffDay": true },
{ "date": "2007-02-17", "isOffDay": false },
{ "date": "2007-02-25", "isOffDay": false }
]
},
{
"year": 2007,
"description": "5月1日—7日放假共7天。其中1日、2日、3日为法定假日将4月28日星期六)、29日星期日两个公休日调至5月4日星期五)、7日星期一)5月5日星期六)、6日星期日照常公休4月28日、29日上班。",
"expected": [
{ "date": "2007-05-01", "isOffDay": true },
{ "date": "2007-05-02", "isOffDay": true },
{ "date": "2007-05-03", "isOffDay": true },
{ "date": "2007-05-04", "isOffDay": true },
{ "date": "2007-05-05", "isOffDay": true },
{ "date": "2007-05-06", "isOffDay": true },
{ "date": "2007-05-07", "isOffDay": true },
{ "date": "2007-04-28", "isOffDay": false },
{ "date": "2007-04-29", "isOffDay": false }
]
},
{
"year": 2007,
"description": "10月1日—7日放假共7天。其中1日、2日、3日为法定假日将9月29日星期六)、30日星期日两个公休日调至10月4日星期四)、5日星期五)10月6日星期六)、7日星期日照常公休9月29日、30日上班。",
"expected": [
{ "date": "2007-10-01", "isOffDay": true },
{ "date": "2007-10-02", "isOffDay": true },
{ "date": "2007-10-03", "isOffDay": true },
{ "date": "2007-10-04", "isOffDay": true },
{ "date": "2007-10-05", "isOffDay": true },
{ "date": "2007-10-06", "isOffDay": true },
{ "date": "2007-10-07", "isOffDay": true },
{ "date": "2007-09-29", "isOffDay": false },
{ "date": "2007-09-30", "isOffDay": false }
]
}
]

View File

@ -86,13 +86,70 @@ def _cast_int(value):
return int(value) if value else None
class DescriptionParser:
"""Parser for holiday shift description. """
def __init__(self, description: str, year: int):
self.description = description
self.year = year
self.date_history = list()
def memorize_date(self, value: date):
self.date_history.append(value)
def clear_memory(self):
del self.date_history[:]
def parse(self) -> Iterator[dict]:
"""Generator for description parsing result.
Args:
year (int): Context year
"""
self.clear_memory()
for i in re.split('[,。;]', self.description):
for j in SentenceParser(self, i).parse():
yield j
if not self.date_history:
raise NotImplementedError(self.description)
def get_date(self, year: Optional[int], month: Optional[int], day: int) -> date:
"""Get date in context.
Args:
year (Optional[int]): year
month (int): month
day (int): day
Returns:
date: Date result
"""
assert day, 'No day specified'
# Special case: month inherit
if month is None:
month = self.date_history[-1].month
# Special case: 12 month may mean previous year
if (year is None
and month == 12
and self.date_history
and max(self.date_history) < date(year=self.year, month=2, day=1)):
year = self.year - 1
year = year or self.year
return date(year=year, month=month, day=day)
class SentenceParser:
"""Parser for holiday shift description sentence. """
def __init__(self, sentence, year):
def __init__(self, parent: DescriptionParser, sentence):
self.parent = parent
self.sentence = sentence
self.year = year
self._date_memory = set()
def extract_dates(self, text: str) -> Iterator[date]:
"""Extract date from text.
@ -105,77 +162,49 @@ class SentenceParser:
"""
count = 0
text = text.replace('(', '').replace(')', '')
for method in self.date_extraction_methods:
for i in method(self, text):
count += 1
if i in self._date_memory:
is_seen = i in self.parent.date_history
self.parent.memorize_date(i)
if is_seen:
continue
self._date_memory.add(i)
yield i
if not count:
raise NotImplementedError(text)
def get_date(self, year: Optional[int], month: int, day: int) -> date:
"""Get date in context.
Args:
year (Optional[int]): year
month (int): month
day (int): day
Returns:
date: Date result
"""
# Special case: 12 month may mean previous year
if (year is None
and month == 12
and self._date_memory
and max(self._date_memory) < date(year=self.year, month=2, day=1)):
year = self.year - 1
year = year or self.year
return date(year=year, month=month, day=day)
def _extract_dates_1(self, value):
match = re.match(r'(?:(\d+)年)?(?:(\d+)月)(\d+)日', value)
if match:
groups = [_cast_int(i) for i in match.groups()]
match = re.findall(r'(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
for groups in match:
groups = [_cast_int(i) for i in groups]
assert len(groups) == 3, groups
yield self.get_date(year=groups[0], month=groups[1], day=groups[2])
yield self.parent.get_date(year=groups[0], month=groups[1], day=groups[2])
def _extract_dates_2(self, value):
match = re.match(
r'(?:(\d+)年)?(?:(\d+)月)(\d+)日(?:至|-|—)(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
if match:
groups = [_cast_int(i) for i in match.groups()]
match = re.findall(
r'(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:至|-|—)(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
for groups in match:
groups = [_cast_int(i) for i in groups]
assert len(groups) == 6, groups
start = self.get_date(year=groups[0],
month=groups[1], day=groups[2])
end = self.get_date(year=groups[3],
month=groups[4] or groups[1], day=groups[5])
start = self.parent.get_date(year=groups[0],
month=groups[1], day=groups[2])
end = self.parent.get_date(year=groups[3],
month=groups[4], day=groups[5])
for i in range((end - start).days + 1):
yield start + timedelta(days=i)
def _extract_dates_3(self, value):
match = re.match(
r'(?:(\d+)年)?(?:(\d+)月)(\d+)日(?:[^]+)?'
match = re.findall(
r'(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:[^]+)?'
r'(?:、(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:[^]+)?)+',
value.replace('(', '').replace(')', ''))
if match:
groups = [_cast_int(i) for i in match.groups()]
value)
for groups in match:
groups = [_cast_int(i) for i in groups]
assert not (len(groups) % 3), groups
year = self.year
month = None
day = None
for i in range(0, len(groups), 3):
year = groups[i]
month = groups[i+1] or month
day = groups[i+2]
assert month
assert day
yield self.get_date(year=year, month=month, day=day)
yield self.parent.get_date(year=groups[i], month=groups[i+1], day=groups[i+2])
date_extraction_methods = [
_extract_dates_1,
@ -183,7 +212,7 @@ class SentenceParser:
_extract_dates_3
]
def parse(self, memory: set) -> Iterator[dict]:
def parse(self) -> Iterator[dict]:
"""Parse days with memory
Args:
@ -193,7 +222,6 @@ class SentenceParser:
Iterator[dict]: Days without name field.
"""
self._date_memory = memory
for method in self.parsing_methods:
for i in method(self):
yield i
@ -217,7 +245,7 @@ class SentenceParser:
}
def _parse_shift_1(self):
match = re.match('(.+)公休日调至(.+)', self.sentence)
match = re.match('(.+)调至(.+)', self.sentence)
if match:
for i in self.extract_dates(match.group(1)):
yield {
@ -237,29 +265,6 @@ class SentenceParser:
]
class DescriptionParser:
"""Parser for holiday shift description. """
def __init__(self, description):
self.description = description
self._date_memory = set()
def parse(self, year: int) -> Iterator[dict]:
"""Generator for description parsing result.
Args:
year (int): Context year
"""
self._date_memory.clear()
for i in re.split('|。', self.description):
for j in SentenceParser(i, year).parse(self._date_memory):
yield j
if not self._date_memory:
raise NotImplementedError(self.description)
def fetch_holiday(year: int):
"""Fetch holiday data. """
@ -273,7 +278,7 @@ def fetch_holiday(year: int):
days.extend({
'name': name,
**j
} for j in DescriptionParser(description).parse(year))
} for j in DescriptionParser(description, year).parse())
return {
'year': year,
'papers': papers,

View File

@ -17,8 +17,8 @@ def _generate_tests():
def create_test(case):
def _test():
year, description, expected = case['year'], case['description'], case['expected']
assert _normalize(DescriptionParser(description)
.parse(year)) == _normalize(expected), case
assert _normalize(DescriptionParser(
description, year).parse()) == _normalize(expected), case
return _test
for index, case in enumerate(cases, 1):