- Add scripts/fetch_hk.py to fetch HK holidays from 1823.gov.hk
- Make generate_ics() cal_name/cal_desc configurable
- Refactor update.py with REGIONS config; support --region cn|hk flag
- Generate hk/{year}.json and hk/{year}.ics under hk/ subdirectory
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
240 lines
6.8 KiB
Python
240 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
"""Script for updating data. """
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
from datetime import datetime, timedelta, tzinfo
|
|
from tempfile import mkstemp
|
|
from typing import Iterator
|
|
from zipfile import ZipFile
|
|
|
|
from tqdm import tqdm
|
|
|
|
from fetch import CustomJSONEncoder, fetch_holiday
|
|
from fetch_hk import HK_START_YEAR, fetch_hk_holiday
|
|
from generate_ics import generate_ics
|
|
from filetools import workspace_path
|
|
|
|
SCHEMA_URL = "https://raw.githubusercontent.com/NateScarlet/holiday-cn/master/schema.json"
|
|
GITHUB_RAW_BASE = "https://raw.githubusercontent.com/NateScarlet/holiday-cn/master"
|
|
|
|
REGIONS = {
|
|
"cn": {
|
|
"fetch": fetch_holiday,
|
|
"start_year": 2007,
|
|
"subdir": None,
|
|
"main_ics_name": "holiday-cn.ics",
|
|
"cal_name": "中国法定节假日",
|
|
"cal_desc": "中国法定节假日数据,自动每日抓取国务院公告。",
|
|
},
|
|
"hk": {
|
|
"fetch": fetch_hk_holiday,
|
|
"start_year": HK_START_YEAR,
|
|
"subdir": "hk",
|
|
"main_ics_name": "holiday-hk.ics",
|
|
"cal_name": "香港公众假期",
|
|
"cal_desc": "香港公众假期数据,来源:香港特别行政区政府 1823.gov.hk。",
|
|
},
|
|
}
|
|
|
|
|
|
class ChinaTimezone(tzinfo):
|
|
"""Timezone of china."""
|
|
|
|
def tzname(self, dt):
|
|
return "UTC+8"
|
|
|
|
def utcoffset(self, dt):
|
|
return timedelta(hours=8)
|
|
|
|
def dst(self, dt):
|
|
return timedelta()
|
|
|
|
|
|
def _region_paths(region: str, year: int):
|
|
"""Return (json_path, ics_path, id_url) for a region and year."""
|
|
subdir = REGIONS[region]["subdir"]
|
|
if subdir:
|
|
os.makedirs(workspace_path(subdir), exist_ok=True)
|
|
json_path = workspace_path(subdir, f"{year}.json")
|
|
ics_path = workspace_path(subdir, f"{year}.ics")
|
|
id_url = f"{GITHUB_RAW_BASE}/{subdir}/{year}.json"
|
|
else:
|
|
json_path = workspace_path(f"{year}.json")
|
|
ics_path = workspace_path(f"{year}.ics")
|
|
id_url = f"{GITHUB_RAW_BASE}/{year}.json"
|
|
return json_path, ics_path, id_url
|
|
|
|
|
|
def update_data(year: int, region: str = "cn") -> Iterator[str]:
|
|
"""Update and store data for a year and region."""
|
|
cfg = REGIONS[region]
|
|
json_path, ics_path, id_url = _region_paths(region, year)
|
|
|
|
data = cfg["fetch"](year)
|
|
|
|
with open(json_path, "w", encoding="utf-8", newline="\n") as f:
|
|
json.dump(
|
|
dict(
|
|
(
|
|
("$schema", SCHEMA_URL),
|
|
("$id", id_url),
|
|
*data.items(),
|
|
)
|
|
),
|
|
f,
|
|
indent=4,
|
|
ensure_ascii=False,
|
|
cls=CustomJSONEncoder,
|
|
)
|
|
|
|
yield json_path
|
|
generate_ics(
|
|
data["days"],
|
|
ics_path,
|
|
cal_name=cfg["cal_name"],
|
|
cal_desc=cfg["cal_desc"],
|
|
)
|
|
yield ics_path
|
|
|
|
|
|
def update_main_ics(fr_year: int, to_year: int, region: str = "cn"):
|
|
cfg = REGIONS[region]
|
|
subdir = cfg["subdir"]
|
|
all_days = []
|
|
|
|
for year in range(fr_year, to_year + 1):
|
|
if subdir:
|
|
filename = workspace_path(subdir, f"{year}.json")
|
|
else:
|
|
filename = workspace_path(f"{year}.json")
|
|
if not os.path.isfile(filename):
|
|
continue
|
|
with open(filename, "r", encoding="utf8") as inf:
|
|
data = json.loads(inf.read())
|
|
all_days.extend(data.get("days", []))
|
|
|
|
filename = workspace_path(cfg["main_ics_name"])
|
|
generate_ics(
|
|
all_days,
|
|
filename,
|
|
cal_name=cfg["cal_name"],
|
|
cal_desc=cfg["cal_desc"],
|
|
)
|
|
return filename
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"--all",
|
|
action="store_true",
|
|
help="Update all years since each region's start year, default is this year and next year",
|
|
)
|
|
parser.add_argument(
|
|
"--release",
|
|
action="store_true",
|
|
help="create new release if repository data is not up to date",
|
|
)
|
|
parser.add_argument(
|
|
"--region",
|
|
choices=list(REGIONS.keys()),
|
|
default=None,
|
|
help="Region to update (default: all regions)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
now = datetime.now(ChinaTimezone())
|
|
is_release = args.release
|
|
regions_to_update = list(REGIONS.keys()) if args.region is None else [args.region]
|
|
|
|
filenames = []
|
|
for region in regions_to_update:
|
|
cfg = REGIONS[region]
|
|
year_start = cfg["start_year"] if args.all else max(cfg["start_year"], now.year)
|
|
progress = tqdm(range(year_start, now.year + 2))
|
|
for year in progress:
|
|
progress.set_description(f"Updating {region} {year}")
|
|
filenames += list(update_data(year, region))
|
|
progress.set_description(f"Updating {cfg['main_ics_name']}")
|
|
filenames.append(update_main_ics(now.year - 4, now.year + 1, region))
|
|
|
|
print("")
|
|
|
|
subprocess.run(["git", "add", *filenames], check=True)
|
|
diff = subprocess.run(
|
|
["git", "diff", "--stat", "--cached"],
|
|
check=True,
|
|
stdout=subprocess.PIPE,
|
|
encoding="utf-8",
|
|
).stdout
|
|
if not diff:
|
|
print("Already up to date.")
|
|
return
|
|
|
|
if not is_release:
|
|
print("Updated repository data, skip release since not specified `--release`")
|
|
return
|
|
|
|
subprocess.run(
|
|
[
|
|
"git",
|
|
"commit",
|
|
"-m",
|
|
"chore(release): update holiday data",
|
|
"-m",
|
|
"[skip ci]",
|
|
],
|
|
check=True,
|
|
)
|
|
subprocess.run(["git", "push"], check=True)
|
|
|
|
tag = now.strftime("%Y.%m.%d")
|
|
temp_note_fd, temp_note_name = mkstemp()
|
|
with open(temp_note_fd, "w", encoding="utf-8") as f:
|
|
f.write(tag + "\n\n```diff\n" + diff + "\n```\n")
|
|
os.makedirs(workspace_path("dist"), exist_ok=True)
|
|
zip_path = workspace_path("dist", f"holiday-cn-{tag}.zip")
|
|
pack_data(zip_path)
|
|
|
|
subprocess.run(
|
|
[
|
|
"gh",
|
|
"release",
|
|
"create",
|
|
"-F",
|
|
temp_note_name,
|
|
tag,
|
|
f"{zip_path}#JSON数据",
|
|
],
|
|
check=True,
|
|
)
|
|
os.unlink(temp_note_name)
|
|
|
|
|
|
def pack_data(file):
|
|
"""Pack all region JSON data into a zip file."""
|
|
zip_file = ZipFile(file, "w")
|
|
# Root-level {year}.json files (CN)
|
|
for name in os.listdir(workspace_path()):
|
|
if re.match(r"\d+\.json", name):
|
|
zip_file.write(workspace_path(name), name)
|
|
# Subdirectory region files (e.g. hk/{year}.json)
|
|
for region, cfg in REGIONS.items():
|
|
subdir = cfg["subdir"]
|
|
if not subdir:
|
|
continue
|
|
subdir_path = workspace_path(subdir)
|
|
if not os.path.isdir(subdir_path):
|
|
continue
|
|
for name in os.listdir(subdir_path):
|
|
if re.match(r"\d+\.json", name):
|
|
zip_file.write(workspace_path(subdir, name), f"{subdir}/{name}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|