Implement parsing (WIP)

This commit is contained in:
NateScarlet 2019-03-08 00:41:27 +08:00
parent 5f68f773a7
commit 68e3d12722
No known key found for this signature in database
GPG Key ID: 5C242793B070309C
2 changed files with 19 additions and 14 deletions

View File

@ -6,7 +6,7 @@
{ "date": "2018-12-30", "isOffDay": true },
{ "date": "2018-12-31", "isOffDay": true },
{ "date": "2019-01-01", "isOffDay": true },
{ "date": "2019-12-29", "isOffDay": false }
{ "date": "2018-12-29", "isOffDay": false }
]
},
{
@ -19,6 +19,7 @@
{ "date": "2019-02-07", "isOffDay": true },
{ "date": "2019-02-08", "isOffDay": true },
{ "date": "2019-02-09", "isOffDay": true },
{ "date": "2019-02-10", "isOffDay": true },
{ "date": "2019-02-02", "isOffDay": false },
{ "date": "2019-02-03", "isOffDay": false }
]
@ -213,7 +214,7 @@
{ "date": "2007-12-30", "isOffDay": true },
{ "date": "2007-12-31", "isOffDay": true },
{ "date": "2008-01-01", "isOffDay": true },
{ "date": "2019-12-29", "isOffDay": false }
{ "date": "2007-12-29", "isOffDay": false }
]
},
{

View File

@ -5,7 +5,7 @@ import argparse
import json
import re
from datetime import date, timedelta
from typing import List
from typing import List, Optional
import bs4
import requests
@ -76,13 +76,18 @@ class SentenceParser:
memory.add(i)
yield i
def get_date(self, year: Optional[int], month: int, day: int) -> date:
if year is None and month > 10:
year = self.year - 1
year = year or self.year
return date(year=year, month=month, day=day)
def _extract_dates_1(self, value):
match = re.match(r'(?:(\d+)年)?(?:(\d+)月)(\d+)日', value)
if match:
groups = [_cast_int(i) for i in match.groups()]
assert len(groups) == 3, groups
yield date(year=groups[0] or self.year,
month=groups[1], day=groups[2])
yield self.get_date(year=groups[0], month=groups[1], day=groups[2])
def _extract_dates_2(self, value):
match = re.match(
@ -90,10 +95,10 @@ class SentenceParser:
if match:
groups = [_cast_int(i) for i in match.groups()]
assert len(groups) == 6, groups
start = date(year=groups[0] or self.year,
month=groups[1], day=groups[2])
end = date(year=groups[3] or self.year,
month=groups[4] or groups[1], day=groups[5])
start = self.get_date(year=groups[0],
month=groups[1], day=groups[2])
end = self.get_date(year=groups[3],
month=groups[4] or groups[1], day=groups[5])
for i in range((end - start).days + 1):
yield start + timedelta(days=i)
@ -107,13 +112,12 @@ class SentenceParser:
month = None
day = None
for i in range(0, len(groups), 3):
year = groups[i] or year
year = groups[i]
month = groups[i+1] or month
day = groups[i+2]
assert year
assert month
assert day
yield date(year=year, month=month, day=day)
yield self.get_date(year=year, month=month, day=day)
date_extraction_methods = [
_extract_dates_1,
@ -149,7 +153,7 @@ class SentenceParser:
'isOffDay': False
}
def _parse_work_2(self):
def _parse_shift_1(self):
match = re.match('(.+)公休日调至(.+)', self.sentence)
if match:
for i in self.extract_dates(match.group(1)):
@ -166,7 +170,7 @@ class SentenceParser:
parsing_methods = [
_parse_rest_1,
_parse_work_1,
_parse_work_2,
_parse_shift_1,
]