Implement parsing (WIP)
This commit is contained in:
parent
5f68f773a7
commit
68e3d12722
|
|
@ -6,7 +6,7 @@
|
||||||
{ "date": "2018-12-30", "isOffDay": true },
|
{ "date": "2018-12-30", "isOffDay": true },
|
||||||
{ "date": "2018-12-31", "isOffDay": true },
|
{ "date": "2018-12-31", "isOffDay": true },
|
||||||
{ "date": "2019-01-01", "isOffDay": true },
|
{ "date": "2019-01-01", "isOffDay": true },
|
||||||
{ "date": "2019-12-29", "isOffDay": false }
|
{ "date": "2018-12-29", "isOffDay": false }
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -19,6 +19,7 @@
|
||||||
{ "date": "2019-02-07", "isOffDay": true },
|
{ "date": "2019-02-07", "isOffDay": true },
|
||||||
{ "date": "2019-02-08", "isOffDay": true },
|
{ "date": "2019-02-08", "isOffDay": true },
|
||||||
{ "date": "2019-02-09", "isOffDay": true },
|
{ "date": "2019-02-09", "isOffDay": true },
|
||||||
|
{ "date": "2019-02-10", "isOffDay": true },
|
||||||
{ "date": "2019-02-02", "isOffDay": false },
|
{ "date": "2019-02-02", "isOffDay": false },
|
||||||
{ "date": "2019-02-03", "isOffDay": false }
|
{ "date": "2019-02-03", "isOffDay": false }
|
||||||
]
|
]
|
||||||
|
|
@ -213,7 +214,7 @@
|
||||||
{ "date": "2007-12-30", "isOffDay": true },
|
{ "date": "2007-12-30", "isOffDay": true },
|
||||||
{ "date": "2007-12-31", "isOffDay": true },
|
{ "date": "2007-12-31", "isOffDay": true },
|
||||||
{ "date": "2008-01-01", "isOffDay": true },
|
{ "date": "2008-01-01", "isOffDay": true },
|
||||||
{ "date": "2019-12-29", "isOffDay": false }
|
{ "date": "2007-12-29", "isOffDay": false }
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import argparse
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from datetime import date, timedelta
|
from datetime import date, timedelta
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
import requests
|
import requests
|
||||||
|
|
@ -76,13 +76,18 @@ class SentenceParser:
|
||||||
memory.add(i)
|
memory.add(i)
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
|
def get_date(self, year: Optional[int], month: int, day: int) -> date:
|
||||||
|
if year is None and month > 10:
|
||||||
|
year = self.year - 1
|
||||||
|
year = year or self.year
|
||||||
|
return date(year=year, month=month, day=day)
|
||||||
|
|
||||||
def _extract_dates_1(self, value):
|
def _extract_dates_1(self, value):
|
||||||
match = re.match(r'(?:(\d+)年)?(?:(\d+)月)(\d+)日', value)
|
match = re.match(r'(?:(\d+)年)?(?:(\d+)月)(\d+)日', value)
|
||||||
if match:
|
if match:
|
||||||
groups = [_cast_int(i) for i in match.groups()]
|
groups = [_cast_int(i) for i in match.groups()]
|
||||||
assert len(groups) == 3, groups
|
assert len(groups) == 3, groups
|
||||||
yield date(year=groups[0] or self.year,
|
yield self.get_date(year=groups[0], month=groups[1], day=groups[2])
|
||||||
month=groups[1], day=groups[2])
|
|
||||||
|
|
||||||
def _extract_dates_2(self, value):
|
def _extract_dates_2(self, value):
|
||||||
match = re.match(
|
match = re.match(
|
||||||
|
|
@ -90,9 +95,9 @@ class SentenceParser:
|
||||||
if match:
|
if match:
|
||||||
groups = [_cast_int(i) for i in match.groups()]
|
groups = [_cast_int(i) for i in match.groups()]
|
||||||
assert len(groups) == 6, groups
|
assert len(groups) == 6, groups
|
||||||
start = date(year=groups[0] or self.year,
|
start = self.get_date(year=groups[0],
|
||||||
month=groups[1], day=groups[2])
|
month=groups[1], day=groups[2])
|
||||||
end = date(year=groups[3] or self.year,
|
end = self.get_date(year=groups[3],
|
||||||
month=groups[4] or groups[1], day=groups[5])
|
month=groups[4] or groups[1], day=groups[5])
|
||||||
for i in range((end - start).days + 1):
|
for i in range((end - start).days + 1):
|
||||||
yield start + timedelta(days=i)
|
yield start + timedelta(days=i)
|
||||||
|
|
@ -107,13 +112,12 @@ class SentenceParser:
|
||||||
month = None
|
month = None
|
||||||
day = None
|
day = None
|
||||||
for i in range(0, len(groups), 3):
|
for i in range(0, len(groups), 3):
|
||||||
year = groups[i] or year
|
year = groups[i]
|
||||||
month = groups[i+1] or month
|
month = groups[i+1] or month
|
||||||
day = groups[i+2]
|
day = groups[i+2]
|
||||||
assert year
|
|
||||||
assert month
|
assert month
|
||||||
assert day
|
assert day
|
||||||
yield date(year=year, month=month, day=day)
|
yield self.get_date(year=year, month=month, day=day)
|
||||||
|
|
||||||
date_extraction_methods = [
|
date_extraction_methods = [
|
||||||
_extract_dates_1,
|
_extract_dates_1,
|
||||||
|
|
@ -149,7 +153,7 @@ class SentenceParser:
|
||||||
'isOffDay': False
|
'isOffDay': False
|
||||||
}
|
}
|
||||||
|
|
||||||
def _parse_work_2(self):
|
def _parse_shift_1(self):
|
||||||
match = re.match('(.+)公休日调至(.+)', self.sentence)
|
match = re.match('(.+)公休日调至(.+)', self.sentence)
|
||||||
if match:
|
if match:
|
||||||
for i in self.extract_dates(match.group(1)):
|
for i in self.extract_dates(match.group(1)):
|
||||||
|
|
@ -166,7 +170,7 @@ class SentenceParser:
|
||||||
parsing_methods = [
|
parsing_methods = [
|
||||||
_parse_rest_1,
|
_parse_rest_1,
|
||||||
_parse_work_1,
|
_parse_work_1,
|
||||||
_parse_work_2,
|
_parse_shift_1,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user