Implement parsing (WIP)

This commit is contained in:
NateScarlet 2019-03-08 00:41:27 +08:00
parent 5f68f773a7
commit 68e3d12722
No known key found for this signature in database
GPG Key ID: 5C242793B070309C
2 changed files with 19 additions and 14 deletions

View File

@ -6,7 +6,7 @@
{ "date": "2018-12-30", "isOffDay": true }, { "date": "2018-12-30", "isOffDay": true },
{ "date": "2018-12-31", "isOffDay": true }, { "date": "2018-12-31", "isOffDay": true },
{ "date": "2019-01-01", "isOffDay": true }, { "date": "2019-01-01", "isOffDay": true },
{ "date": "2019-12-29", "isOffDay": false } { "date": "2018-12-29", "isOffDay": false }
] ]
}, },
{ {
@ -19,6 +19,7 @@
{ "date": "2019-02-07", "isOffDay": true }, { "date": "2019-02-07", "isOffDay": true },
{ "date": "2019-02-08", "isOffDay": true }, { "date": "2019-02-08", "isOffDay": true },
{ "date": "2019-02-09", "isOffDay": true }, { "date": "2019-02-09", "isOffDay": true },
{ "date": "2019-02-10", "isOffDay": true },
{ "date": "2019-02-02", "isOffDay": false }, { "date": "2019-02-02", "isOffDay": false },
{ "date": "2019-02-03", "isOffDay": false } { "date": "2019-02-03", "isOffDay": false }
] ]
@ -213,7 +214,7 @@
{ "date": "2007-12-30", "isOffDay": true }, { "date": "2007-12-30", "isOffDay": true },
{ "date": "2007-12-31", "isOffDay": true }, { "date": "2007-12-31", "isOffDay": true },
{ "date": "2008-01-01", "isOffDay": true }, { "date": "2008-01-01", "isOffDay": true },
{ "date": "2019-12-29", "isOffDay": false } { "date": "2007-12-29", "isOffDay": false }
] ]
}, },
{ {

View File

@ -5,7 +5,7 @@ import argparse
import json import json
import re import re
from datetime import date, timedelta from datetime import date, timedelta
from typing import List from typing import List, Optional
import bs4 import bs4
import requests import requests
@ -76,13 +76,18 @@ class SentenceParser:
memory.add(i) memory.add(i)
yield i yield i
def get_date(self, year: Optional[int], month: int, day: int) -> date:
if year is None and month > 10:
year = self.year - 1
year = year or self.year
return date(year=year, month=month, day=day)
def _extract_dates_1(self, value): def _extract_dates_1(self, value):
match = re.match(r'(?:(\d+)年)?(?:(\d+)月)(\d+)日', value) match = re.match(r'(?:(\d+)年)?(?:(\d+)月)(\d+)日', value)
if match: if match:
groups = [_cast_int(i) for i in match.groups()] groups = [_cast_int(i) for i in match.groups()]
assert len(groups) == 3, groups assert len(groups) == 3, groups
yield date(year=groups[0] or self.year, yield self.get_date(year=groups[0], month=groups[1], day=groups[2])
month=groups[1], day=groups[2])
def _extract_dates_2(self, value): def _extract_dates_2(self, value):
match = re.match( match = re.match(
@ -90,10 +95,10 @@ class SentenceParser:
if match: if match:
groups = [_cast_int(i) for i in match.groups()] groups = [_cast_int(i) for i in match.groups()]
assert len(groups) == 6, groups assert len(groups) == 6, groups
start = date(year=groups[0] or self.year, start = self.get_date(year=groups[0],
month=groups[1], day=groups[2]) month=groups[1], day=groups[2])
end = date(year=groups[3] or self.year, end = self.get_date(year=groups[3],
month=groups[4] or groups[1], day=groups[5]) month=groups[4] or groups[1], day=groups[5])
for i in range((end - start).days + 1): for i in range((end - start).days + 1):
yield start + timedelta(days=i) yield start + timedelta(days=i)
@ -107,13 +112,12 @@ class SentenceParser:
month = None month = None
day = None day = None
for i in range(0, len(groups), 3): for i in range(0, len(groups), 3):
year = groups[i] or year year = groups[i]
month = groups[i+1] or month month = groups[i+1] or month
day = groups[i+2] day = groups[i+2]
assert year
assert month assert month
assert day assert day
yield date(year=year, month=month, day=day) yield self.get_date(year=year, month=month, day=day)
date_extraction_methods = [ date_extraction_methods = [
_extract_dates_1, _extract_dates_1,
@ -149,7 +153,7 @@ class SentenceParser:
'isOffDay': False 'isOffDay': False
} }
def _parse_work_2(self): def _parse_shift_1(self):
match = re.match('(.+)公休日调至(.+)', self.sentence) match = re.match('(.+)公休日调至(.+)', self.sentence)
if match: if match:
for i in self.extract_dates(match.group(1)): for i in self.extract_dates(match.group(1)):
@ -166,7 +170,7 @@ class SentenceParser:
parsing_methods = [ parsing_methods = [
_parse_rest_1, _parse_rest_1,
_parse_work_1, _parse_work_1,
_parse_work_2, _parse_shift_1,
] ]