From d1369f1c05b64db1135bc38f61aa538ff53c37da Mon Sep 17 00:00:00 2001
From: NateScarlet <NateScarlet@Gmail.com>
Date: Mon, 15 May 2023 01:58:42 +0800
Subject: [PATCH] fix: follow site dom change

---
 scripts/fetch.py      |  2 +-
 scripts/fetch_test.py | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/scripts/fetch.py b/scripts/fetch.py
index 88b3e4d..85ad881 100644
--- a/scripts/fetch.py
+++ b/scripts/fetch.py
@@ -130,7 +130,7 @@ def get_paper(url: str) -> str:
     soup = bs4.BeautifulSoup(response.text, features="html.parser")
     container = soup.find(id="UCAP-CONTENT")
     assert container, f"Can not get paper container from url: {url}"
-    ret = container.get_text().replace("\u3000\u3000", "\n")
+    ret = "\n".join((i.get_text() for i in container.find_all("p")))
     assert ret, f"Can not get paper content from url: {url}"
     return ret
 
diff --git a/scripts/fetch_test.py b/scripts/fetch_test.py
index 3f5b7f8..7c5496a 100644
--- a/scripts/fetch_test.py
+++ b/scripts/fetch_test.py
@@ -31,6 +31,24 @@ def test_get_rules():
     ) == [("劳动节", "2019年5月1日至4日放假调休，共4天。4月28日（星期日）、5月5日（星期日）上班。")]
 
 
+def test_get_rules_2023():
+    got = list(
+        get_rules(
+            get_paper(
+                "http://www.gov.cn/zhengce/content/2022-12/08/content_5730844.htm"
+            )
+        )
+    )
+    assert got == [
+        ("元旦", "2022年12月31日至2023年1月2日放假调休，共3天。"),
+        ("春节", "1月21日至27日放假调休，共7天。1月28日（星期六）、1月29日（星期日）上班。"),
+        ("清明节", "4月5日放假，共1天。"),
+        ("劳动节", "4月29日至5月3日放假调休，共5天。4月23日（星期日）、5月6日（星期六）上班。"),
+        ("端午节", "6月22日至24日放假调休，共3天。6月25日（星期日）上班。"),
+        ("中秋节、国庆节", "9月29日至10月6日放假调休，共8天。10月7日（星期六）、10月8日（星期日）上班。"),
+    ]
+
+
 def _normalize(iterable):
     return sorted(
         json.loads(json.dumps(list(iterable), cls=CustomJSONEncoder)),