Implement parsing (WIP)
This commit is contained in:
parent
7a2d9af66e
commit
5f68f773a7
|
|
@ -86,7 +86,7 @@ class SentenceParser:
|
||||||
|
|
||||||
def _extract_dates_2(self, value):
|
def _extract_dates_2(self, value):
|
||||||
match = re.match(
|
match = re.match(
|
||||||
r'(?:(\d+)年)?(?:(\d+)月)(\d+)日(?:至|-)(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
|
r'(?:(\d+)年)?(?:(\d+)月)(\d+)日(?:至|-|—)(?:(\d+)年)?(?:(\d+)月)?(\d+)日', value)
|
||||||
if match:
|
if match:
|
||||||
groups = [_cast_int(i) for i in match.groups()]
|
groups = [_cast_int(i) for i in match.groups()]
|
||||||
assert len(groups) == 6, groups
|
assert len(groups) == 6, groups
|
||||||
|
|
@ -171,8 +171,12 @@ class SentenceParser:
|
||||||
|
|
||||||
|
|
||||||
def parse_holiday_description(description: str, year: int):
|
def parse_holiday_description(description: str, year: int):
|
||||||
|
date_memory = set()
|
||||||
for i in re.split(',|。', description):
|
for i in re.split(',|。', description):
|
||||||
for j in SentenceParser(i, year).parse():
|
for j in SentenceParser(i, year).parse():
|
||||||
|
if j['date'] in date_memory:
|
||||||
|
continue
|
||||||
|
date_memory.add(j['date'])
|
||||||
yield j
|
yield j
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user