Prepare test data

This commit is contained in:
NateScarlet 2019-03-06 21:41:31 +08:00
parent 748decd373
commit 4bcf0aed87
No known key found for this signature in database
GPG Key ID: 5C242793B070309C
4 changed files with 390 additions and 0 deletions

View File

@ -0,0 +1,295 @@
[
{
"year": 2019,
"description": "2018年12月30日至2019年1月1日放假调休共3天。2018年12月29日星期六上班。",
"expected": [
{ "date": "2018-12-30", "isOffDay": true },
{ "date": "2018-12-31", "isOffDay": true },
{ "date": "2019-01-01", "isOffDay": true },
{ "date": "2019-12-29", "isOffDay": false }
]
},
{
"year": 2019,
"description": "2月4日至10日放假调休共7天。2月2日星期六、2月3日星期日上班。",
"expected": [
{ "date": "2019-02-04", "isOffDay": true },
{ "date": "2019-02-05", "isOffDay": true },
{ "date": "2019-02-06", "isOffDay": true },
{ "date": "2019-02-07", "isOffDay": true },
{ "date": "2019-02-08", "isOffDay": true },
{ "date": "2019-02-09", "isOffDay": true },
{ "date": "2019-02-02", "isOffDay": false },
{ "date": "2019-02-03", "isOffDay": false }
]
},
{
"year": 2019,
"description": "4月5日放假与周末连休。",
"expected": [{ "date": "2019-04-05", "isOffDay": true }]
},
{
"year": 2019,
"description": "5月1日放假。",
"expected": [{ "date": "2019-05-01", "isOffDay": true }]
},
{
"year": 2019,
"description": "6月7日放假与周末连休。",
"expected": [{ "date": "2019-06-07", "isOffDay": true }]
},
{
"year": 2019,
"description": "9月13日放假与周末连休。",
"expected": [{ "date": "2019-09-13", "isOffDay": true }]
},
{
"year": 2019,
"description": "10月1日至7日放假调休共7天。9月29日星期日、10月12日星期六上班。",
"expected": [
{ "date": "2019-10-01", "isOffDay": true },
{ "date": "2019-10-02", "isOffDay": true },
{ "date": "2019-10-03", "isOffDay": true },
{ "date": "2019-10-04", "isOffDay": true },
{ "date": "2019-10-05", "isOffDay": true },
{ "date": "2019-10-06", "isOffDay": true },
{ "date": "2019-10-07", "isOffDay": true },
{ "date": "2019-09-29", "isOffDay": false },
{ "date": "2019-10-12", "isOffDay": false }
]
},
{
"year": 2018,
"description": "1月1日放假与周末连休。",
"expected": [{ "date": "2018-01-01", "isOffDay": true }]
},
{
"year": 2018,
"description": "2月15日至21日放假调休共7天。2月11日星期日、2月24日星期六上班。",
"expected": [
{ "date": "2018-02-15", "isOffDay": true },
{ "date": "2018-02-16", "isOffDay": true },
{ "date": "2018-02-17", "isOffDay": true },
{ "date": "2018-02-18", "isOffDay": true },
{ "date": "2018-02-19", "isOffDay": true },
{ "date": "2018-02-20", "isOffDay": true },
{ "date": "2018-02-21", "isOffDay": true },
{ "date": "2018-02-11", "isOffDay": false },
{ "date": "2018-02-24", "isOffDay": false }
]
},
{
"year": 2018,
"description": "4月5日至7日放假调休共3天。4月8日星期日上班。",
"expected": [
{ "date": "2018-04-05", "isOffDay": true },
{ "date": "2018-04-06", "isOffDay": true },
{ "date": "2018-04-07", "isOffDay": true },
{ "date": "2018-04-08", "isOffDay": false }
]
},
{
"year": 2018,
"description": "4月29日至5月1日放假调休共3天。4月28日星期六上班。",
"expected": [
{ "date": "2018-04-29", "isOffDay": true },
{ "date": "2018-04-30", "isOffDay": true },
{ "date": "2018-05-01", "isOffDay": true },
{ "date": "2018-04-28", "isOffDay": false }
]
},
{
"year": 2018,
"description": "6月18日放假与周末连休。",
"expected": [{ "date": "2018-06-18", "isOffDay": true }]
},
{
"year": 2018,
"description": "9月24日放假与周末连休。",
"expected": [{ "date": "2018-09-24", "isOffDay": true }]
},
{
"year": 2018,
"description": "10月1日至7日放假调休共7天。9月29日星期六、9月30日星期日上班。",
"expected": [
{ "date": "2018-10-01", "isOffDay": true },
{ "date": "2018-10-02", "isOffDay": true },
{ "date": "2018-10-03", "isOffDay": true },
{ "date": "2018-10-04", "isOffDay": true },
{ "date": "2018-10-05", "isOffDay": true },
{ "date": "2018-10-06", "isOffDay": true },
{ "date": "2018-10-07", "isOffDay": true },
{ "date": "2018-09-29", "isOffDay": false },
{ "date": "2018-09-30", "isOffDay": false }
]
},
{
"year": 2013,
"description": "1月1日至3日放假调休共3天。1月5日(星期六)、1月6日(星期日)上班。",
"expected": [
{ "date": "2013-01-01", "isOffDay": true },
{ "date": "2013-01-02", "isOffDay": true },
{ "date": "2013-01-03", "isOffDay": true },
{ "date": "2013-01-05", "isOffDay": false },
{ "date": "2013-01-06", "isOffDay": false }
]
},
{
"year": 2013,
"description": "2月9日至15日放假调休共7天。2月16日(星期六)、2月17日(星期日)上班。",
"expected": [
{ "date": "2013-02-09", "isOffDay": true },
{ "date": "2013-02-10", "isOffDay": true },
{ "date": "2013-02-11", "isOffDay": true },
{ "date": "2013-02-12", "isOffDay": true },
{ "date": "2013-02-13", "isOffDay": true },
{ "date": "2013-02-14", "isOffDay": true },
{ "date": "2013-02-15", "isOffDay": true },
{ "date": "2013-02-16", "isOffDay": false },
{ "date": "2013-02-17", "isOffDay": false }
]
},
{
"year": 2013,
"description": "4月4日至6日放假调休共3天。4月7日(星期日)上班。",
"expected": [
{ "date": "2013-04-04", "isOffDay": true },
{ "date": "2013-04-05", "isOffDay": true },
{ "date": "2013-04-06", "isOffDay": true },
{ "date": "2013-04-07", "isOffDay": false }
]
},
{
"year": 2013,
"description": "4月29日至5月1日放假调休共3天。4月27日(星期六)、4月28日(星期日)上班。",
"expected": [
{ "date": "2013-04-29", "isOffDay": true },
{ "date": "2013-04-30", "isOffDay": true },
{ "date": "2013-05-01", "isOffDay": true },
{ "date": "2013-04-27", "isOffDay": false },
{ "date": "2013-04-28", "isOffDay": false }
]
},
{
"year": 2013,
"description": "6月10日至12日放假调休共3天。6月8日(星期六)、6月9日(星期日)上班。",
"expected": [
{ "date": "2013-06-10", "isOffDay": true },
{ "date": "2013-06-11", "isOffDay": true },
{ "date": "2013-06-12", "isOffDay": true },
{ "date": "2013-06-08", "isOffDay": false },
{ "date": "2013-06-09", "isOffDay": false }
]
},
{
"year": 2013,
"description": "9月19日至21日放假调休共3天。9月22日(星期日)上班。",
"expected": [
{ "date": "2013-09-19", "isOffDay": true },
{ "date": "2013-09-20", "isOffDay": true },
{ "date": "2013-09-21", "isOffDay": true },
{ "date": "2013-09-22", "isOffDay": false }
]
},
{
"year": 2013,
"description": "10月1日至7日放假调休共7天。9月29日(星期日)、10月12日(星期六)上班。",
"expected": [
{ "date": "2013-10-01", "isOffDay": true },
{ "date": "2013-10-02", "isOffDay": true },
{ "date": "2013-10-03", "isOffDay": true },
{ "date": "2013-10-04", "isOffDay": true },
{ "date": "2013-10-05", "isOffDay": true },
{ "date": "2013-10-06", "isOffDay": true },
{ "date": "2013-10-07", "isOffDay": true },
{ "date": "2013-09-29", "isOffDay": false },
{ "date": "2013-10-12", "isOffDay": false }
]
},
{
"year": 2008,
"description": "2007年12月30日—2008年1月1日放假共3天。其中1月1日星期二为法定节假日12月30日星期日为公休日12月29日星期六公休日调至12月31日星期一12月29日星期六上班。",
"expected": [
{ "date": "2007-12-30", "isOffDay": true },
{ "date": "2007-12-31", "isOffDay": true },
{ "date": "2008-01-01", "isOffDay": true },
{ "date": "2019-12-29", "isOffDay": false }
]
},
{
"year": 2008,
"description": "2月6日—12日农历除夕至正月初六放假共7天。其中2月6日除夕、2月7日春节、2月8日正月初二为法定节假日2月9日星期六、2月10日星期日照常公休2月2日星期六、2月3日星期日两个公休日调至2月11日星期一、2月12日星期二2月2日星期六、2月3日星期日上班。",
"expected": [
{ "date": "2008-02-06", "isOffDay": true },
{ "date": "2008-02-07", "isOffDay": true },
{ "date": "2008-02-08", "isOffDay": true },
{ "date": "2008-02-09", "isOffDay": true },
{ "date": "2008-02-10", "isOffDay": true },
{ "date": "2008-02-11", "isOffDay": true },
{ "date": "2008-02-12", "isOffDay": true },
{ "date": "2008-02-02", "isOffDay": false },
{ "date": "2008-02-03", "isOffDay": false }
]
},
{
"year": 2008,
"description": "4月4日—6日放假共3天。其中4月4日清明节为法定节假日4月5日星期六、4月6日星期日照常公休。",
"expected": [
{ "date": "2008-04-04", "isOffDay": true },
{ "date": "2008-04-05", "isOffDay": true },
{ "date": "2008-04-06", "isOffDay": true }
]
},
{
"year": 2008,
"description": "5月1日—3日放假共3天。其中5月1日为法定节假日5月3日星期六为公休日5月4日星期日公休日调至5月2日星期五5月4日星期日上班。",
"expected": [
{ "date": "2008-05-01", "isOffDay": true },
{ "date": "2008-05-02", "isOffDay": true },
{ "date": "2008-05-03", "isOffDay": true },
{ "date": "2008-05-04", "isOffDay": false }
]
},
{
"year": 2008,
"description": "6月7日—9日放假共3天。其中6月7日星期六照常公休6月8日农历五月初五端午节为法定节假日6月8日星期日公休日调至6月9日星期一。",
"expected": [
{ "date": "2008-06-07", "isOffDay": true },
{ "date": "2008-06-08", "isOffDay": true },
{ "date": "2008-06-09", "isOffDay": true }
]
},
{
"year": 2008,
"description": "9月13日—15日放假共3天。其中9月13日星期六为公休日9月14日农历八月十五中秋节为法定节假日9月14日星期日公休日调至9月15日星期一。",
"expected": [
{ "date": "2008-09-13", "isOffDay": true },
{ "date": "2008-09-14", "isOffDay": true },
{ "date": "2008-09-15", "isOffDay": true }
]
},
{
"year": 2008,
"description": "9月13日—15日放假共3天。其中9月13日星期六为公休日9月14日农历八月十五中秋节为法定节假日9月14日星期日公休日调至9月15日星期一。",
"expected": [
{ "date": "2008-09-13", "isOffDay": true },
{ "date": "2008-09-14", "isOffDay": true },
{ "date": "2008-09-15", "isOffDay": true }
]
},
{
"year": 2008,
"description": "9月29日—10月5日放假共7天。其中10月1日、2日、3日为法定节假日9月27日星期六、9月28日星期日两个公休日调至9月29日星期一、30日星期二10月4日星期六、5日星期日照常公休。",
"expected": [
{ "date": "2008-09-29", "isOffDay": true },
{ "date": "2008-09-30", "isOffDay": true },
{ "date": "2008-10-01", "isOffDay": true },
{ "date": "2008-10-02", "isOffDay": true },
{ "date": "2008-10-03", "isOffDay": true },
{ "date": "2008-10-04", "isOffDay": true },
{ "date": "2008-10-05", "isOffDay": true },
{ "date": "2008-09-27", "isOffDay": false },
{ "date": "2008-09-28", "isOffDay": false }
]
}
]

69
fetch_holidays.py Normal file
View File

@ -0,0 +1,69 @@
#!/usr/bin/env python3
"""Fetch holidays from gov.cn """
import argparse
import re
import bs4
import requests
SEARCH_URL = ('http://sousuo.gov.cn/s.htm'
'?t=paper&advance=true&sort=&title={year}+%E8%8A%82%E5%81%87%E6%97%A5'
'&puborg=%E5%9B%BD%E5%8A%A1%E9%99%A2%E5%8A%9E%E5%85%AC%E5%8E%85'
'&pcodeJiguan=%E5%9B%BD%E5%8A%9E%E5%8F%91%E6%98%8E%E7%94%B5')
def get_paper_urls(year):
url = SEARCH_URL.format(year=year)
body = requests.get(url).text
ret = re.findall(
r'<li class="res-list".*?<a href="(.+?)".*?</li>', body, flags=re.S)
assert all(
re.match(
r'http://www.gov.cn/zhengce/content/\d{4}-\d{2}/\d{2}/content_\d+.htm', i)
for i in ret), 'Site changed, need human verify'
return ret
def get_paper(url):
response = requests.get(url)
response.encoding = 'utf-8'
soup = bs4.BeautifulSoup(response.text, features='html.parser')
container = soup.find('td', class_='b12c')
assert container, f'Can not get paper container from url: {url}'
ret = container.get_text()
assert ret, f'Can not get paper context from url: {url}'
return ret
def get_rules(paper: str):
lines: list = paper.splitlines()
for i in sorted(set(lines), key=lines.index):
match = re.match(r'[一二三四五六七八九十]、(.+?)(.+)', i)
if match:
yield match.groups()
def parse_holiday_description(year, description):
pass
def parse_paper(url):
pass
def main():
parser = argparse.ArgumentParser()
parser.add_argument('year')
args = parser.parse_args()
papers = get_paper_urls(args.year)
for i in papers:
paper = get_paper(i)
[print(i) for i in get_rules(paper)]
if __name__ == '__main__':
main()

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
requests ~= 2.21.0
beautifulsoup4 ~= 4.7.1

24
test_fetch_holidays.py Normal file
View File

@ -0,0 +1,24 @@
import json
import sys
from fetch_holidays import parse_holiday_description
def _generate_tests():
with open('description_parsing_cases.json', 'r', encoding='utf-8', ) as f:
cases = json.load(f)
def create_test(case):
def _test():
year, description, expected = case['year'], case['description'], case['expected']
assert parse_holiday_description(
year, description) == expected, case
return _test
for index, case in enumerate(cases, 1):
setattr(sys.modules[__name__],
f'test_parse_holiday_description_{index}', create_test(case))
_generate_tests()