Fix missiong days in 2007, 2011
This commit is contained in:
parent
56f9e396c8
commit
73becdc0c9
10
2007.json
10
2007.json
|
|
@ -29,6 +29,11 @@
|
||||||
"date": "2007-01-03",
|
"date": "2007-01-03",
|
||||||
"isOffDay": true
|
"isOffDay": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "春节",
|
||||||
|
"date": "2007-02-17",
|
||||||
|
"isOffDay": false
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "春节",
|
"name": "春节",
|
||||||
"date": "2007-02-18",
|
"date": "2007-02-18",
|
||||||
|
|
@ -64,6 +69,11 @@
|
||||||
"date": "2007-02-24",
|
"date": "2007-02-24",
|
||||||
"isOffDay": true
|
"isOffDay": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "春节",
|
||||||
|
"date": "2007-02-25",
|
||||||
|
"isOffDay": false
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "“五一”",
|
"name": "“五一”",
|
||||||
"date": "2007-04-28",
|
"date": "2007-04-28",
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,11 @@
|
||||||
"date": "2011-02-02",
|
"date": "2011-02-02",
|
||||||
"isOffDay": true
|
"isOffDay": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "春节",
|
||||||
|
"date": "2011-02-08",
|
||||||
|
"isOffDay": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "春节",
|
"name": "春节",
|
||||||
"date": "2011-02-12",
|
"date": "2011-02-12",
|
||||||
|
|
|
||||||
|
|
@ -422,5 +422,61 @@
|
||||||
{ "date": "2008-09-27", "isOffDay": false },
|
{ "date": "2008-09-27", "isOffDay": false },
|
||||||
{ "date": "2008-09-28", "isOffDay": false }
|
{ "date": "2008-09-28", "isOffDay": false }
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": 2007,
|
||||||
|
"description": "1月1日—3日放假,共3天。其中1月1日为法定假日,将2006年12月30日(星期六)、31日(星期日)两个公休日分别调至2007年1月2日、3日,2006年12月30日(星期六)、31日(星期日)上班。",
|
||||||
|
"expected": [
|
||||||
|
{ "date": "2007-01-01", "isOffDay": true },
|
||||||
|
{ "date": "2007-01-02", "isOffDay": true },
|
||||||
|
{ "date": "2007-01-03", "isOffDay": true },
|
||||||
|
{ "date": "2006-12-30", "isOffDay": false },
|
||||||
|
{ "date": "2006-12-31", "isOffDay": false }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": 2007,
|
||||||
|
"description": "2月18日—24日(即农历大年初一至初七)放假,共7天。其中18日、19日、20日为法定假日,将17日(星期六)、18日(星期日)、25日(星期日)三个公休日分别调至21日(星期三)、22日(星期四)、23日(星期五);24日(星期六)照常公休,17日、25日上班。",
|
||||||
|
"expected": [
|
||||||
|
{ "date": "2007-02-18", "isOffDay": true },
|
||||||
|
{ "date": "2007-02-19", "isOffDay": true },
|
||||||
|
{ "date": "2007-02-20", "isOffDay": true },
|
||||||
|
{ "date": "2007-02-21", "isOffDay": true },
|
||||||
|
{ "date": "2007-02-22", "isOffDay": true },
|
||||||
|
{ "date": "2007-02-23", "isOffDay": true },
|
||||||
|
{ "date": "2007-02-24", "isOffDay": true },
|
||||||
|
{ "date": "2007-02-17", "isOffDay": false },
|
||||||
|
{ "date": "2007-02-25", "isOffDay": false }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": 2007,
|
||||||
|
"description": "5月1日—7日放假,共7天。其中,1日、2日、3日为法定假日,将4月28日(星期六)、29日(星期日)两个公休日调至5月4日(星期五)、7日(星期一);5月5日(星期六)、6日(星期日)照常公休,4月28日、29日上班。",
|
||||||
|
"expected": [
|
||||||
|
{ "date": "2007-05-01", "isOffDay": true },
|
||||||
|
{ "date": "2007-05-02", "isOffDay": true },
|
||||||
|
{ "date": "2007-05-03", "isOffDay": true },
|
||||||
|
{ "date": "2007-05-04", "isOffDay": true },
|
||||||
|
{ "date": "2007-05-05", "isOffDay": true },
|
||||||
|
{ "date": "2007-05-06", "isOffDay": true },
|
||||||
|
{ "date": "2007-05-07", "isOffDay": true },
|
||||||
|
{ "date": "2007-04-28", "isOffDay": false },
|
||||||
|
{ "date": "2007-04-29", "isOffDay": false }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": 2007,
|
||||||
|
"description": "10月1日—7日放假,共7天。其中,1日、2日、3日为法定假日,将9月29日(星期六)、30日(星期日)两个公休日调至10月4日(星期四)、5日(星期五);10月6日(星期六)、7日(星期日)照常公休,9月29日、30日上班。",
|
||||||
|
"expected": [
|
||||||
|
{ "date": "2007-10-01", "isOffDay": true },
|
||||||
|
{ "date": "2007-10-02", "isOffDay": true },
|
||||||
|
{ "date": "2007-10-03", "isOffDay": true },
|
||||||
|
{ "date": "2007-10-04", "isOffDay": true },
|
||||||
|
{ "date": "2007-10-05", "isOffDay": true },
|
||||||
|
{ "date": "2007-10-06", "isOffDay": true },
|
||||||
|
{ "date": "2007-10-07", "isOffDay": true },
|
||||||
|
{ "date": "2007-09-29", "isOffDay": false },
|
||||||
|
{ "date": "2007-09-30", "isOffDay": false }
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -86,13 +86,70 @@ def _cast_int(value):
|
||||||
return int(value) if value else None
|
return int(value) if value else None
|
||||||
|
|
||||||
|
|
||||||
|
class DescriptionParser:
|
||||||
|
"""Parser for holiday shift description. """
|
||||||
|
|
||||||
|
def __init__(self, description: str, year: int):
|
||||||
|
self.description = description
|
||||||
|
self.year = year
|
||||||
|
self.date_history = list()
|
||||||
|
|
||||||
|
def memorize_date(self, value: date):
|
||||||
|
self.date_history.append(value)
|
||||||
|
|
||||||
|
def clear_memory(self):
|
||||||
|
del self.date_history[:]
|
||||||
|
|
||||||
|
def parse(self) -> Iterator[dict]:
|
||||||
|
"""Generator for description parsing result.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
year (int): Context year
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.clear_memory()
|
||||||
|
for i in re.split('[,。;]', self.description):
|
||||||
|
for j in SentenceParser(self, i).parse():
|
||||||
|
yield j
|
||||||
|
|
||||||
|
if not self.date_history:
|
||||||
|
raise NotImplementedError(self.description)
|
||||||
|
|
||||||
|
def get_date(self, year: Optional[int], month: Optional[int], day: int) -> date:
|
||||||
|
"""Get date in context.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
year (Optional[int]): year
|
||||||
|
month (int): month
|
||||||
|
day (int): day
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
date: Date result
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert day, 'No day specified'
|
||||||
|
|
||||||
|
# Special case: month inherit
|
||||||
|
if month is None:
|
||||||
|
month = self.date_history[-1].month
|
||||||
|
|
||||||
|
# Special case: 12 month may mean previous year
|
||||||
|
if (year is None
|
||||||
|
and month == 12
|
||||||
|
and self.date_history
|
||||||
|
and max(self.date_history) < date(year=self.year, month=2, day=1)):
|
||||||
|
year = self.year - 1
|
||||||
|
|
||||||
|
year = year or self.year
|
||||||
|
return date(year=year, month=month, day=day)
|
||||||
|
|
||||||
|
|
||||||
class SentenceParser:
|
class SentenceParser:
|
||||||
"""Parser for holiday shift description sentence. """
|
"""Parser for holiday shift description sentence. """
|
||||||
|
|
||||||
def __init__(self, sentence, year):
|
def __init__(self, parent: DescriptionParser, sentence):
|
||||||
|
self.parent = parent
|
||||||
self.sentence = sentence
|
self.sentence = sentence
|
||||||
self.year = year
|
|
||||||
self._date_memory = set()
|
|
||||||
|
|
||||||
def extract_dates(self, text: str) -> Iterator[date]:
|
def extract_dates(self, text: str) -> Iterator[date]:
|
||||||
"""Extract date from text.
|
"""Extract date from text.
|
||||||
|
|
@ -105,77 +162,49 @@ class SentenceParser:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
|
text = text.replace('(', '(').replace(')', ')')
|
||||||
for method in self.date_extraction_methods:
|
for method in self.date_extraction_methods:
|
||||||
for i in method(self, text):
|
for i in method(self, text):
|
||||||
count += 1
|
count += 1
|
||||||
if i in self._date_memory:
|
is_seen = i in self.parent.date_history
|
||||||
|
self.parent.memorize_date(i)
|
||||||
|
if is_seen:
|
||||||
continue
|
continue
|
||||||
self._date_memory.add(i)
|
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
if not count:
|
if not count:
|
||||||
raise NotImplementedError(text)
|
raise NotImplementedError(text)
|
||||||
|
|
||||||
def get_date(self, year: Optional[int], month: int, day: int) -> date:
|
|
||||||
"""Get date in context.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
year (Optional[int]): year
|
|
||||||
month (int): month
|
|
||||||
day (int): day
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
date: Date result
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Special case: 12 month may mean previous year
|
|
||||||
if (year is None
|
|
||||||
and month == 12
|
|
||||||
and self._date_memory
|
|
||||||
and max(self._date_memory) < date(year=self.year, month=2, day=1)):
|
|
||||||
year = self.year - 1
|
|
||||||
|
|
||||||
year = year or self.year
|
|
||||||
return date(year=year, month=month, day=day)
|
|
||||||
|
|
||||||
def _extract_dates_1(self, value):
|
def _extract_dates_1(self, value):
|
||||||
match = re.match(r'(?:(\d+)年)?(?:(\d+)月)(\d+)日', value)
|
match = re.findall(r'(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
|
||||||
if match:
|
for groups in match:
|
||||||
groups = [_cast_int(i) for i in match.groups()]
|
groups = [_cast_int(i) for i in groups]
|
||||||
assert len(groups) == 3, groups
|
assert len(groups) == 3, groups
|
||||||
yield self.get_date(year=groups[0], month=groups[1], day=groups[2])
|
yield self.parent.get_date(year=groups[0], month=groups[1], day=groups[2])
|
||||||
|
|
||||||
def _extract_dates_2(self, value):
|
def _extract_dates_2(self, value):
|
||||||
match = re.match(
|
match = re.findall(
|
||||||
r'(?:(\d+)年)?(?:(\d+)月)(\d+)日(?:至|-|—)(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
|
r'(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:至|-|—)(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
|
||||||
if match:
|
for groups in match:
|
||||||
groups = [_cast_int(i) for i in match.groups()]
|
groups = [_cast_int(i) for i in groups]
|
||||||
assert len(groups) == 6, groups
|
assert len(groups) == 6, groups
|
||||||
start = self.get_date(year=groups[0],
|
start = self.parent.get_date(year=groups[0],
|
||||||
month=groups[1], day=groups[2])
|
month=groups[1], day=groups[2])
|
||||||
end = self.get_date(year=groups[3],
|
end = self.parent.get_date(year=groups[3],
|
||||||
month=groups[4] or groups[1], day=groups[5])
|
month=groups[4], day=groups[5])
|
||||||
for i in range((end - start).days + 1):
|
for i in range((end - start).days + 1):
|
||||||
yield start + timedelta(days=i)
|
yield start + timedelta(days=i)
|
||||||
|
|
||||||
def _extract_dates_3(self, value):
|
def _extract_dates_3(self, value):
|
||||||
match = re.match(
|
match = re.findall(
|
||||||
r'(?:(\d+)年)?(?:(\d+)月)(\d+)日(?:([^)]+))?'
|
r'(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:([^)]+))?'
|
||||||
r'(?:、(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:([^)]+))?)+',
|
r'(?:、(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:([^)]+))?)+',
|
||||||
value.replace('(', '(').replace(')', ')'))
|
value)
|
||||||
if match:
|
for groups in match:
|
||||||
groups = [_cast_int(i) for i in match.groups()]
|
groups = [_cast_int(i) for i in groups]
|
||||||
assert not (len(groups) % 3), groups
|
assert not (len(groups) % 3), groups
|
||||||
year = self.year
|
|
||||||
month = None
|
|
||||||
day = None
|
|
||||||
for i in range(0, len(groups), 3):
|
for i in range(0, len(groups), 3):
|
||||||
year = groups[i]
|
yield self.parent.get_date(year=groups[i], month=groups[i+1], day=groups[i+2])
|
||||||
month = groups[i+1] or month
|
|
||||||
day = groups[i+2]
|
|
||||||
assert month
|
|
||||||
assert day
|
|
||||||
yield self.get_date(year=year, month=month, day=day)
|
|
||||||
|
|
||||||
date_extraction_methods = [
|
date_extraction_methods = [
|
||||||
_extract_dates_1,
|
_extract_dates_1,
|
||||||
|
|
@ -183,7 +212,7 @@ class SentenceParser:
|
||||||
_extract_dates_3
|
_extract_dates_3
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse(self, memory: set) -> Iterator[dict]:
|
def parse(self) -> Iterator[dict]:
|
||||||
"""Parse days with memory
|
"""Parse days with memory
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
@ -193,7 +222,6 @@ class SentenceParser:
|
||||||
Iterator[dict]: Days without name field.
|
Iterator[dict]: Days without name field.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self._date_memory = memory
|
|
||||||
for method in self.parsing_methods:
|
for method in self.parsing_methods:
|
||||||
for i in method(self):
|
for i in method(self):
|
||||||
yield i
|
yield i
|
||||||
|
|
@ -217,7 +245,7 @@ class SentenceParser:
|
||||||
}
|
}
|
||||||
|
|
||||||
def _parse_shift_1(self):
|
def _parse_shift_1(self):
|
||||||
match = re.match('(.+)公休日调至(.+)', self.sentence)
|
match = re.match('(.+)调至(.+)', self.sentence)
|
||||||
if match:
|
if match:
|
||||||
for i in self.extract_dates(match.group(1)):
|
for i in self.extract_dates(match.group(1)):
|
||||||
yield {
|
yield {
|
||||||
|
|
@ -237,29 +265,6 @@ class SentenceParser:
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class DescriptionParser:
|
|
||||||
"""Parser for holiday shift description. """
|
|
||||||
|
|
||||||
def __init__(self, description):
|
|
||||||
self.description = description
|
|
||||||
self._date_memory = set()
|
|
||||||
|
|
||||||
def parse(self, year: int) -> Iterator[dict]:
|
|
||||||
"""Generator for description parsing result.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
year (int): Context year
|
|
||||||
"""
|
|
||||||
|
|
||||||
self._date_memory.clear()
|
|
||||||
for i in re.split(',|。', self.description):
|
|
||||||
for j in SentenceParser(i, year).parse(self._date_memory):
|
|
||||||
yield j
|
|
||||||
|
|
||||||
if not self._date_memory:
|
|
||||||
raise NotImplementedError(self.description)
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_holiday(year: int):
|
def fetch_holiday(year: int):
|
||||||
"""Fetch holiday data. """
|
"""Fetch holiday data. """
|
||||||
|
|
||||||
|
|
@ -273,7 +278,7 @@ def fetch_holiday(year: int):
|
||||||
days.extend({
|
days.extend({
|
||||||
'name': name,
|
'name': name,
|
||||||
**j
|
**j
|
||||||
} for j in DescriptionParser(description).parse(year))
|
} for j in DescriptionParser(description, year).parse())
|
||||||
return {
|
return {
|
||||||
'year': year,
|
'year': year,
|
||||||
'papers': papers,
|
'papers': papers,
|
||||||
|
|
|
||||||
|
|
@ -17,8 +17,8 @@ def _generate_tests():
|
||||||
def create_test(case):
|
def create_test(case):
|
||||||
def _test():
|
def _test():
|
||||||
year, description, expected = case['year'], case['description'], case['expected']
|
year, description, expected = case['year'], case['description'], case['expected']
|
||||||
assert _normalize(DescriptionParser(description)
|
assert _normalize(DescriptionParser(
|
||||||
.parse(year)) == _normalize(expected), case
|
description, year).parse()) == _normalize(expected), case
|
||||||
return _test
|
return _test
|
||||||
|
|
||||||
for index, case in enumerate(cases, 1):
|
for index, case in enumerate(cases, 1):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user