fix: follow site dom change
This commit is contained in:
parent
43ade74e48
commit
868d0eb712
|
|
@ -128,7 +128,7 @@ def get_paper(url: str) -> str:
|
||||||
_raise_for_status_200(response)
|
_raise_for_status_200(response)
|
||||||
response.encoding = "utf-8"
|
response.encoding = "utf-8"
|
||||||
soup = bs4.BeautifulSoup(response.text, features="html.parser")
|
soup = bs4.BeautifulSoup(response.text, features="html.parser")
|
||||||
container = soup.find("td", class_="b12c")
|
container = soup.find(id="UCAP-CONTENT")
|
||||||
assert container, f"Can not get paper container from url: {url}"
|
assert container, f"Can not get paper container from url: {url}"
|
||||||
ret = container.get_text().replace("\u3000\u3000", "\n")
|
ret = container.get_text().replace("\u3000\u3000", "\n")
|
||||||
assert ret, f"Can not get paper content from url: {url}"
|
assert ret, f"Can not get paper content from url: {url}"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user