Fix missiong days in 2007, 2011
This commit is contained in:
parent
56f9e396c8
commit
73becdc0c9
10
2007.json
10
2007.json
|
|
@ -29,6 +29,11 @@
|
|||
"date": "2007-01-03",
|
||||
"isOffDay": true
|
||||
},
|
||||
{
|
||||
"name": "春节",
|
||||
"date": "2007-02-17",
|
||||
"isOffDay": false
|
||||
},
|
||||
{
|
||||
"name": "春节",
|
||||
"date": "2007-02-18",
|
||||
|
|
@ -64,6 +69,11 @@
|
|||
"date": "2007-02-24",
|
||||
"isOffDay": true
|
||||
},
|
||||
{
|
||||
"name": "春节",
|
||||
"date": "2007-02-25",
|
||||
"isOffDay": false
|
||||
},
|
||||
{
|
||||
"name": "“五一”",
|
||||
"date": "2007-04-28",
|
||||
|
|
|
|||
|
|
@ -29,6 +29,11 @@
|
|||
"date": "2011-02-02",
|
||||
"isOffDay": true
|
||||
},
|
||||
{
|
||||
"name": "春节",
|
||||
"date": "2011-02-08",
|
||||
"isOffDay": true
|
||||
},
|
||||
{
|
||||
"name": "春节",
|
||||
"date": "2011-02-12",
|
||||
|
|
|
|||
|
|
@ -422,5 +422,61 @@
|
|||
{ "date": "2008-09-27", "isOffDay": false },
|
||||
{ "date": "2008-09-28", "isOffDay": false }
|
||||
]
|
||||
},
|
||||
{
|
||||
"year": 2007,
|
||||
"description": "1月1日—3日放假,共3天。其中1月1日为法定假日,将2006年12月30日(星期六)、31日(星期日)两个公休日分别调至2007年1月2日、3日,2006年12月30日(星期六)、31日(星期日)上班。",
|
||||
"expected": [
|
||||
{ "date": "2007-01-01", "isOffDay": true },
|
||||
{ "date": "2007-01-02", "isOffDay": true },
|
||||
{ "date": "2007-01-03", "isOffDay": true },
|
||||
{ "date": "2006-12-30", "isOffDay": false },
|
||||
{ "date": "2006-12-31", "isOffDay": false }
|
||||
]
|
||||
},
|
||||
{
|
||||
"year": 2007,
|
||||
"description": "2月18日—24日(即农历大年初一至初七)放假,共7天。其中18日、19日、20日为法定假日,将17日(星期六)、18日(星期日)、25日(星期日)三个公休日分别调至21日(星期三)、22日(星期四)、23日(星期五);24日(星期六)照常公休,17日、25日上班。",
|
||||
"expected": [
|
||||
{ "date": "2007-02-18", "isOffDay": true },
|
||||
{ "date": "2007-02-19", "isOffDay": true },
|
||||
{ "date": "2007-02-20", "isOffDay": true },
|
||||
{ "date": "2007-02-21", "isOffDay": true },
|
||||
{ "date": "2007-02-22", "isOffDay": true },
|
||||
{ "date": "2007-02-23", "isOffDay": true },
|
||||
{ "date": "2007-02-24", "isOffDay": true },
|
||||
{ "date": "2007-02-17", "isOffDay": false },
|
||||
{ "date": "2007-02-25", "isOffDay": false }
|
||||
]
|
||||
},
|
||||
{
|
||||
"year": 2007,
|
||||
"description": "5月1日—7日放假,共7天。其中,1日、2日、3日为法定假日,将4月28日(星期六)、29日(星期日)两个公休日调至5月4日(星期五)、7日(星期一);5月5日(星期六)、6日(星期日)照常公休,4月28日、29日上班。",
|
||||
"expected": [
|
||||
{ "date": "2007-05-01", "isOffDay": true },
|
||||
{ "date": "2007-05-02", "isOffDay": true },
|
||||
{ "date": "2007-05-03", "isOffDay": true },
|
||||
{ "date": "2007-05-04", "isOffDay": true },
|
||||
{ "date": "2007-05-05", "isOffDay": true },
|
||||
{ "date": "2007-05-06", "isOffDay": true },
|
||||
{ "date": "2007-05-07", "isOffDay": true },
|
||||
{ "date": "2007-04-28", "isOffDay": false },
|
||||
{ "date": "2007-04-29", "isOffDay": false }
|
||||
]
|
||||
},
|
||||
{
|
||||
"year": 2007,
|
||||
"description": "10月1日—7日放假,共7天。其中,1日、2日、3日为法定假日,将9月29日(星期六)、30日(星期日)两个公休日调至10月4日(星期四)、5日(星期五);10月6日(星期六)、7日(星期日)照常公休,9月29日、30日上班。",
|
||||
"expected": [
|
||||
{ "date": "2007-10-01", "isOffDay": true },
|
||||
{ "date": "2007-10-02", "isOffDay": true },
|
||||
{ "date": "2007-10-03", "isOffDay": true },
|
||||
{ "date": "2007-10-04", "isOffDay": true },
|
||||
{ "date": "2007-10-05", "isOffDay": true },
|
||||
{ "date": "2007-10-06", "isOffDay": true },
|
||||
{ "date": "2007-10-07", "isOffDay": true },
|
||||
{ "date": "2007-09-29", "isOffDay": false },
|
||||
{ "date": "2007-09-30", "isOffDay": false }
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -86,13 +86,70 @@ def _cast_int(value):
|
|||
return int(value) if value else None
|
||||
|
||||
|
||||
class DescriptionParser:
|
||||
"""Parser for holiday shift description. """
|
||||
|
||||
def __init__(self, description: str, year: int):
|
||||
self.description = description
|
||||
self.year = year
|
||||
self.date_history = list()
|
||||
|
||||
def memorize_date(self, value: date):
|
||||
self.date_history.append(value)
|
||||
|
||||
def clear_memory(self):
|
||||
del self.date_history[:]
|
||||
|
||||
def parse(self) -> Iterator[dict]:
|
||||
"""Generator for description parsing result.
|
||||
|
||||
Args:
|
||||
year (int): Context year
|
||||
"""
|
||||
|
||||
self.clear_memory()
|
||||
for i in re.split('[,。;]', self.description):
|
||||
for j in SentenceParser(self, i).parse():
|
||||
yield j
|
||||
|
||||
if not self.date_history:
|
||||
raise NotImplementedError(self.description)
|
||||
|
||||
def get_date(self, year: Optional[int], month: Optional[int], day: int) -> date:
|
||||
"""Get date in context.
|
||||
|
||||
Args:
|
||||
year (Optional[int]): year
|
||||
month (int): month
|
||||
day (int): day
|
||||
|
||||
Returns:
|
||||
date: Date result
|
||||
"""
|
||||
|
||||
assert day, 'No day specified'
|
||||
|
||||
# Special case: month inherit
|
||||
if month is None:
|
||||
month = self.date_history[-1].month
|
||||
|
||||
# Special case: 12 month may mean previous year
|
||||
if (year is None
|
||||
and month == 12
|
||||
and self.date_history
|
||||
and max(self.date_history) < date(year=self.year, month=2, day=1)):
|
||||
year = self.year - 1
|
||||
|
||||
year = year or self.year
|
||||
return date(year=year, month=month, day=day)
|
||||
|
||||
|
||||
class SentenceParser:
|
||||
"""Parser for holiday shift description sentence. """
|
||||
|
||||
def __init__(self, sentence, year):
|
||||
def __init__(self, parent: DescriptionParser, sentence):
|
||||
self.parent = parent
|
||||
self.sentence = sentence
|
||||
self.year = year
|
||||
self._date_memory = set()
|
||||
|
||||
def extract_dates(self, text: str) -> Iterator[date]:
|
||||
"""Extract date from text.
|
||||
|
|
@ -105,77 +162,49 @@ class SentenceParser:
|
|||
"""
|
||||
|
||||
count = 0
|
||||
text = text.replace('(', '(').replace(')', ')')
|
||||
for method in self.date_extraction_methods:
|
||||
for i in method(self, text):
|
||||
count += 1
|
||||
if i in self._date_memory:
|
||||
is_seen = i in self.parent.date_history
|
||||
self.parent.memorize_date(i)
|
||||
if is_seen:
|
||||
continue
|
||||
self._date_memory.add(i)
|
||||
yield i
|
||||
|
||||
if not count:
|
||||
raise NotImplementedError(text)
|
||||
|
||||
def get_date(self, year: Optional[int], month: int, day: int) -> date:
|
||||
"""Get date in context.
|
||||
|
||||
Args:
|
||||
year (Optional[int]): year
|
||||
month (int): month
|
||||
day (int): day
|
||||
|
||||
Returns:
|
||||
date: Date result
|
||||
"""
|
||||
|
||||
# Special case: 12 month may mean previous year
|
||||
if (year is None
|
||||
and month == 12
|
||||
and self._date_memory
|
||||
and max(self._date_memory) < date(year=self.year, month=2, day=1)):
|
||||
year = self.year - 1
|
||||
|
||||
year = year or self.year
|
||||
return date(year=year, month=month, day=day)
|
||||
|
||||
def _extract_dates_1(self, value):
|
||||
match = re.match(r'(?:(\d+)年)?(?:(\d+)月)(\d+)日', value)
|
||||
if match:
|
||||
groups = [_cast_int(i) for i in match.groups()]
|
||||
match = re.findall(r'(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
|
||||
for groups in match:
|
||||
groups = [_cast_int(i) for i in groups]
|
||||
assert len(groups) == 3, groups
|
||||
yield self.get_date(year=groups[0], month=groups[1], day=groups[2])
|
||||
yield self.parent.get_date(year=groups[0], month=groups[1], day=groups[2])
|
||||
|
||||
def _extract_dates_2(self, value):
|
||||
match = re.match(
|
||||
r'(?:(\d+)年)?(?:(\d+)月)(\d+)日(?:至|-|—)(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
|
||||
if match:
|
||||
groups = [_cast_int(i) for i in match.groups()]
|
||||
match = re.findall(
|
||||
r'(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:至|-|—)(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
|
||||
for groups in match:
|
||||
groups = [_cast_int(i) for i in groups]
|
||||
assert len(groups) == 6, groups
|
||||
start = self.get_date(year=groups[0],
|
||||
month=groups[1], day=groups[2])
|
||||
end = self.get_date(year=groups[3],
|
||||
month=groups[4] or groups[1], day=groups[5])
|
||||
start = self.parent.get_date(year=groups[0],
|
||||
month=groups[1], day=groups[2])
|
||||
end = self.parent.get_date(year=groups[3],
|
||||
month=groups[4], day=groups[5])
|
||||
for i in range((end - start).days + 1):
|
||||
yield start + timedelta(days=i)
|
||||
|
||||
def _extract_dates_3(self, value):
|
||||
match = re.match(
|
||||
r'(?:(\d+)年)?(?:(\d+)月)(\d+)日(?:([^)]+))?'
|
||||
match = re.findall(
|
||||
r'(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:([^)]+))?'
|
||||
r'(?:、(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:([^)]+))?)+',
|
||||
value.replace('(', '(').replace(')', ')'))
|
||||
if match:
|
||||
groups = [_cast_int(i) for i in match.groups()]
|
||||
value)
|
||||
for groups in match:
|
||||
groups = [_cast_int(i) for i in groups]
|
||||
assert not (len(groups) % 3), groups
|
||||
year = self.year
|
||||
month = None
|
||||
day = None
|
||||
for i in range(0, len(groups), 3):
|
||||
year = groups[i]
|
||||
month = groups[i+1] or month
|
||||
day = groups[i+2]
|
||||
assert month
|
||||
assert day
|
||||
yield self.get_date(year=year, month=month, day=day)
|
||||
yield self.parent.get_date(year=groups[i], month=groups[i+1], day=groups[i+2])
|
||||
|
||||
date_extraction_methods = [
|
||||
_extract_dates_1,
|
||||
|
|
@ -183,7 +212,7 @@ class SentenceParser:
|
|||
_extract_dates_3
|
||||
]
|
||||
|
||||
def parse(self, memory: set) -> Iterator[dict]:
|
||||
def parse(self) -> Iterator[dict]:
|
||||
"""Parse days with memory
|
||||
|
||||
Args:
|
||||
|
|
@ -193,7 +222,6 @@ class SentenceParser:
|
|||
Iterator[dict]: Days without name field.
|
||||
"""
|
||||
|
||||
self._date_memory = memory
|
||||
for method in self.parsing_methods:
|
||||
for i in method(self):
|
||||
yield i
|
||||
|
|
@ -217,7 +245,7 @@ class SentenceParser:
|
|||
}
|
||||
|
||||
def _parse_shift_1(self):
|
||||
match = re.match('(.+)公休日调至(.+)', self.sentence)
|
||||
match = re.match('(.+)调至(.+)', self.sentence)
|
||||
if match:
|
||||
for i in self.extract_dates(match.group(1)):
|
||||
yield {
|
||||
|
|
@ -237,29 +265,6 @@ class SentenceParser:
|
|||
]
|
||||
|
||||
|
||||
class DescriptionParser:
|
||||
"""Parser for holiday shift description. """
|
||||
|
||||
def __init__(self, description):
|
||||
self.description = description
|
||||
self._date_memory = set()
|
||||
|
||||
def parse(self, year: int) -> Iterator[dict]:
|
||||
"""Generator for description parsing result.
|
||||
|
||||
Args:
|
||||
year (int): Context year
|
||||
"""
|
||||
|
||||
self._date_memory.clear()
|
||||
for i in re.split(',|。', self.description):
|
||||
for j in SentenceParser(i, year).parse(self._date_memory):
|
||||
yield j
|
||||
|
||||
if not self._date_memory:
|
||||
raise NotImplementedError(self.description)
|
||||
|
||||
|
||||
def fetch_holiday(year: int):
|
||||
"""Fetch holiday data. """
|
||||
|
||||
|
|
@ -273,7 +278,7 @@ def fetch_holiday(year: int):
|
|||
days.extend({
|
||||
'name': name,
|
||||
**j
|
||||
} for j in DescriptionParser(description).parse(year))
|
||||
} for j in DescriptionParser(description, year).parse())
|
||||
return {
|
||||
'year': year,
|
||||
'papers': papers,
|
||||
|
|
|
|||
|
|
@ -17,8 +17,8 @@ def _generate_tests():
|
|||
def create_test(case):
|
||||
def _test():
|
||||
year, description, expected = case['year'], case['description'], case['expected']
|
||||
assert _normalize(DescriptionParser(description)
|
||||
.parse(year)) == _normalize(expected), case
|
||||
assert _normalize(DescriptionParser(
|
||||
description, year).parse()) == _normalize(expected), case
|
||||
return _test
|
||||
|
||||
for index, case in enumerate(cases, 1):
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user