#!/usr/bin/env python3 """Script for updating data. """ import argparse import json import os import re import subprocess from datetime import datetime, timedelta, tzinfo from tempfile import mkstemp from typing import Iterator from zipfile import ZipFile from tqdm import tqdm from fetch import CustomJSONEncoder, fetch_holiday from fetch_hk import HK_START_YEAR, fetch_hk_holiday from generate_ics import generate_ics from filetools import workspace_path SCHEMA_URL = "https://raw.githubusercontent.com/NateScarlet/holiday-cn/master/schema.json" GITHUB_RAW_BASE = "https://raw.githubusercontent.com/NateScarlet/holiday-cn/master" REGIONS = { "cn": { "fetch": fetch_holiday, "start_year": 2007, "subdir": None, "main_ics_name": "holiday-cn.ics", "cal_name": "中国法定节假日", "cal_desc": "中国法定节假日数据,自动每日抓取国务院公告。", }, "hk": { "fetch": fetch_hk_holiday, "start_year": HK_START_YEAR, "subdir": "hk", "main_ics_name": "holiday-hk.ics", "cal_name": "香港公众假期", "cal_desc": "香港公众假期数据,来源:香港特别行政区政府 1823.gov.hk。", }, } class ChinaTimezone(tzinfo): """Timezone of china.""" def tzname(self, dt): return "UTC+8" def utcoffset(self, dt): return timedelta(hours=8) def dst(self, dt): return timedelta() def _region_paths(region: str, year: int): """Return (json_path, ics_path, id_url) for a region and year.""" subdir = REGIONS[region]["subdir"] if subdir: os.makedirs(workspace_path(subdir), exist_ok=True) json_path = workspace_path(subdir, f"{year}.json") ics_path = workspace_path(subdir, f"{year}.ics") id_url = f"{GITHUB_RAW_BASE}/{subdir}/{year}.json" else: json_path = workspace_path(f"{year}.json") ics_path = workspace_path(f"{year}.ics") id_url = f"{GITHUB_RAW_BASE}/{year}.json" return json_path, ics_path, id_url def update_data(year: int, region: str = "cn") -> Iterator[str]: """Update and store data for a year and region.""" cfg = REGIONS[region] json_path, ics_path, id_url = _region_paths(region, year) data = cfg["fetch"](year) with open(json_path, "w", encoding="utf-8", newline="\n") as f: json.dump( dict( ( ("$schema", SCHEMA_URL), ("$id", id_url), *data.items(), ) ), f, indent=4, ensure_ascii=False, cls=CustomJSONEncoder, ) yield json_path generate_ics( data["days"], ics_path, cal_name=cfg["cal_name"], cal_desc=cfg["cal_desc"], ) yield ics_path def update_main_ics(fr_year: int, to_year: int, region: str = "cn"): cfg = REGIONS[region] subdir = cfg["subdir"] all_days = [] for year in range(fr_year, to_year + 1): if subdir: filename = workspace_path(subdir, f"{year}.json") else: filename = workspace_path(f"{year}.json") if not os.path.isfile(filename): continue with open(filename, "r", encoding="utf8") as inf: data = json.loads(inf.read()) all_days.extend(data.get("days", [])) filename = workspace_path(cfg["main_ics_name"]) generate_ics( all_days, filename, cal_name=cfg["cal_name"], cal_desc=cfg["cal_desc"], ) return filename def main(): parser = argparse.ArgumentParser() parser.add_argument( "--all", action="store_true", help="Update all years since each region's start year, default is this year and next year", ) parser.add_argument( "--release", action="store_true", help="create new release if repository data is not up to date", ) parser.add_argument( "--region", choices=list(REGIONS.keys()), default=None, help="Region to update (default: all regions)", ) args = parser.parse_args() now = datetime.now(ChinaTimezone()) is_release = args.release regions_to_update = list(REGIONS.keys()) if args.region is None else [args.region] filenames = [] for region in regions_to_update: cfg = REGIONS[region] year_start = cfg["start_year"] if args.all else max(cfg["start_year"], now.year) progress = tqdm(range(year_start, now.year + 2)) for year in progress: progress.set_description(f"Updating {region} {year}") filenames += list(update_data(year, region)) progress.set_description(f"Updating {cfg['main_ics_name']}") filenames.append(update_main_ics(now.year - 4, now.year + 1, region)) print("") subprocess.run(["git", "add", *filenames], check=True) diff = subprocess.run( ["git", "diff", "--stat", "--cached"], check=True, stdout=subprocess.PIPE, encoding="utf-8", ).stdout if not diff: print("Already up to date.") return if not is_release: print("Updated repository data, skip release since not specified `--release`") return subprocess.run( [ "git", "commit", "-m", "chore(release): update holiday data", "-m", "[skip ci]", ], check=True, ) subprocess.run(["git", "push"], check=True) tag = now.strftime("%Y.%m.%d") temp_note_fd, temp_note_name = mkstemp() with open(temp_note_fd, "w", encoding="utf-8") as f: f.write(tag + "\n\n```diff\n" + diff + "\n```\n") os.makedirs(workspace_path("dist"), exist_ok=True) zip_path = workspace_path("dist", f"holiday-cn-{tag}.zip") pack_data(zip_path) subprocess.run( [ "gh", "release", "create", "-F", temp_note_name, tag, f"{zip_path}#JSON数据", ], check=True, ) os.unlink(temp_note_name) def pack_data(file): """Pack all region JSON data into a zip file.""" zip_file = ZipFile(file, "w") # Root-level {year}.json files (CN) for name in os.listdir(workspace_path()): if re.match(r"\d+\.json", name): zip_file.write(workspace_path(name), name) # Subdirectory region files (e.g. hk/{year}.json) for region, cfg in REGIONS.items(): subdir = cfg["subdir"] if not subdir: continue subdir_path = workspace_path(subdir) if not os.path.isdir(subdir_path): continue for name in os.listdir(subdir_path): if re.match(r"\d+\.json", name): zip_file.write(workspace_path(subdir, name), f"{subdir}/{name}") if __name__ == "__main__": main()