Files
holiday-cn/scripts/update.py
mingsheng.li 05a1dca074 feat: add Hong Kong public holiday support
- Add scripts/fetch_hk.py to fetch HK holidays from 1823.gov.hk
- Make generate_ics() cal_name/cal_desc configurable
- Refactor update.py with REGIONS config; support --region cn|hk flag
- Generate hk/{year}.json and hk/{year}.ics under hk/ subdirectory

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-27 12:09:34 +08:00

240 lines
6.8 KiB
Python

#!/usr/bin/env python3
"""Script for updating data. """
import argparse
import json
import os
import re
import subprocess
from datetime import datetime, timedelta, tzinfo
from tempfile import mkstemp
from typing import Iterator
from zipfile import ZipFile
from tqdm import tqdm
from fetch import CustomJSONEncoder, fetch_holiday
from fetch_hk import HK_START_YEAR, fetch_hk_holiday
from generate_ics import generate_ics
from filetools import workspace_path
SCHEMA_URL = "https://raw.githubusercontent.com/NateScarlet/holiday-cn/master/schema.json"
GITHUB_RAW_BASE = "https://raw.githubusercontent.com/NateScarlet/holiday-cn/master"
REGIONS = {
"cn": {
"fetch": fetch_holiday,
"start_year": 2007,
"subdir": None,
"main_ics_name": "holiday-cn.ics",
"cal_name": "中国法定节假日",
"cal_desc": "中国法定节假日数据,自动每日抓取国务院公告。",
},
"hk": {
"fetch": fetch_hk_holiday,
"start_year": HK_START_YEAR,
"subdir": "hk",
"main_ics_name": "holiday-hk.ics",
"cal_name": "香港公众假期",
"cal_desc": "香港公众假期数据,来源:香港特别行政区政府 1823.gov.hk。",
},
}
class ChinaTimezone(tzinfo):
"""Timezone of china."""
def tzname(self, dt):
return "UTC+8"
def utcoffset(self, dt):
return timedelta(hours=8)
def dst(self, dt):
return timedelta()
def _region_paths(region: str, year: int):
"""Return (json_path, ics_path, id_url) for a region and year."""
subdir = REGIONS[region]["subdir"]
if subdir:
os.makedirs(workspace_path(subdir), exist_ok=True)
json_path = workspace_path(subdir, f"{year}.json")
ics_path = workspace_path(subdir, f"{year}.ics")
id_url = f"{GITHUB_RAW_BASE}/{subdir}/{year}.json"
else:
json_path = workspace_path(f"{year}.json")
ics_path = workspace_path(f"{year}.ics")
id_url = f"{GITHUB_RAW_BASE}/{year}.json"
return json_path, ics_path, id_url
def update_data(year: int, region: str = "cn") -> Iterator[str]:
"""Update and store data for a year and region."""
cfg = REGIONS[region]
json_path, ics_path, id_url = _region_paths(region, year)
data = cfg["fetch"](year)
with open(json_path, "w", encoding="utf-8", newline="\n") as f:
json.dump(
dict(
(
("$schema", SCHEMA_URL),
("$id", id_url),
*data.items(),
)
),
f,
indent=4,
ensure_ascii=False,
cls=CustomJSONEncoder,
)
yield json_path
generate_ics(
data["days"],
ics_path,
cal_name=cfg["cal_name"],
cal_desc=cfg["cal_desc"],
)
yield ics_path
def update_main_ics(fr_year: int, to_year: int, region: str = "cn"):
cfg = REGIONS[region]
subdir = cfg["subdir"]
all_days = []
for year in range(fr_year, to_year + 1):
if subdir:
filename = workspace_path(subdir, f"{year}.json")
else:
filename = workspace_path(f"{year}.json")
if not os.path.isfile(filename):
continue
with open(filename, "r", encoding="utf8") as inf:
data = json.loads(inf.read())
all_days.extend(data.get("days", []))
filename = workspace_path(cfg["main_ics_name"])
generate_ics(
all_days,
filename,
cal_name=cfg["cal_name"],
cal_desc=cfg["cal_desc"],
)
return filename
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--all",
action="store_true",
help="Update all years since each region's start year, default is this year and next year",
)
parser.add_argument(
"--release",
action="store_true",
help="create new release if repository data is not up to date",
)
parser.add_argument(
"--region",
choices=list(REGIONS.keys()),
default=None,
help="Region to update (default: all regions)",
)
args = parser.parse_args()
now = datetime.now(ChinaTimezone())
is_release = args.release
regions_to_update = list(REGIONS.keys()) if args.region is None else [args.region]
filenames = []
for region in regions_to_update:
cfg = REGIONS[region]
year_start = cfg["start_year"] if args.all else max(cfg["start_year"], now.year)
progress = tqdm(range(year_start, now.year + 2))
for year in progress:
progress.set_description(f"Updating {region} {year}")
filenames += list(update_data(year, region))
progress.set_description(f"Updating {cfg['main_ics_name']}")
filenames.append(update_main_ics(now.year - 4, now.year + 1, region))
print("")
subprocess.run(["git", "add", *filenames], check=True)
diff = subprocess.run(
["git", "diff", "--stat", "--cached"],
check=True,
stdout=subprocess.PIPE,
encoding="utf-8",
).stdout
if not diff:
print("Already up to date.")
return
if not is_release:
print("Updated repository data, skip release since not specified `--release`")
return
subprocess.run(
[
"git",
"commit",
"-m",
"chore(release): update holiday data",
"-m",
"[skip ci]",
],
check=True,
)
subprocess.run(["git", "push"], check=True)
tag = now.strftime("%Y.%m.%d")
temp_note_fd, temp_note_name = mkstemp()
with open(temp_note_fd, "w", encoding="utf-8") as f:
f.write(tag + "\n\n```diff\n" + diff + "\n```\n")
os.makedirs(workspace_path("dist"), exist_ok=True)
zip_path = workspace_path("dist", f"holiday-cn-{tag}.zip")
pack_data(zip_path)
subprocess.run(
[
"gh",
"release",
"create",
"-F",
temp_note_name,
tag,
f"{zip_path}#JSON数据",
],
check=True,
)
os.unlink(temp_note_name)
def pack_data(file):
"""Pack all region JSON data into a zip file."""
zip_file = ZipFile(file, "w")
# Root-level {year}.json files (CN)
for name in os.listdir(workspace_path()):
if re.match(r"\d+\.json", name):
zip_file.write(workspace_path(name), name)
# Subdirectory region files (e.g. hk/{year}.json)
for region, cfg in REGIONS.items():
subdir = cfg["subdir"]
if not subdir:
continue
subdir_path = workspace_path(subdir)
if not os.path.isdir(subdir_path):
continue
for name in os.listdir(subdir_path):
if re.match(r"\d+\.json", name):
zip_file.write(workspace_path(subdir, name), f"{subdir}/{name}")
if __name__ == "__main__":
main()