fix: follow site dom change

This commit is contained in:
NateScarlet
2023-05-15 01:58:42 +08:00
parent 868d0eb712
commit d1369f1c05
2 changed files with 19 additions and 1 deletions

View File

@@ -130,7 +130,7 @@ def get_paper(url: str) -> str:
soup = bs4.BeautifulSoup(response.text, features="html.parser")
container = soup.find(id="UCAP-CONTENT")
assert container, f"Can not get paper container from url: {url}"
ret = container.get_text().replace("\u3000\u3000", "\n")
ret = "\n".join((i.get_text() for i in container.find_all("p")))
assert ret, f"Can not get paper content from url: {url}"
return ret