feat: check http status code
This commit is contained in:
parent
68d591d058
commit
b673f3770c
|
|
@ -46,6 +46,15 @@ PRE_PARSED_PAPERS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _raise_for_status_200(resp: requests.Response):
|
||||||
|
resp.raise_for_status()
|
||||||
|
if resp.status_code != 200:
|
||||||
|
raise requests.HTTPError(
|
||||||
|
"request failed: %s: %d" % (resp.request.url, resp.status_code),
|
||||||
|
response=resp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_paper_urls(year: int) -> List[str]:
|
def get_paper_urls(year: int) -> List[str]:
|
||||||
"""Find year related paper urls.
|
"""Find year related paper urls.
|
||||||
|
|
||||||
|
|
@ -56,7 +65,7 @@ def get_paper_urls(year: int) -> List[str]:
|
||||||
List[str]: Urls, newlest first.
|
List[str]: Urls, newlest first.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
body = requests.get(
|
resp = requests.get(
|
||||||
SEARCH_URL,
|
SEARCH_URL,
|
||||||
params={
|
params={
|
||||||
"t": "paper",
|
"t": "paper",
|
||||||
|
|
@ -66,9 +75,10 @@ def get_paper_urls(year: int) -> List[str]:
|
||||||
"pcodeJiguan": "国办发明电",
|
"pcodeJiguan": "国办发明电",
|
||||||
"puborg": "国务院办公厅",
|
"puborg": "国务院办公厅",
|
||||||
},
|
},
|
||||||
).text
|
)
|
||||||
|
_raise_for_status_200(resp)
|
||||||
ret = re.findall(
|
ret = re.findall(
|
||||||
r'<li class="res-list".*?<a href="(.+?)".*?</li>', body, flags=re.S
|
r'<li class="res-list".*?<a href="(.+?)".*?</li>', resp.text, flags=re.S
|
||||||
)
|
)
|
||||||
ret = [i for i in ret if i not in PAPER_EXCLUDE]
|
ret = [i for i in ret if i not in PAPER_EXCLUDE]
|
||||||
ret += PAPER_INCLUDE.get(year, [])
|
ret += PAPER_INCLUDE.get(year, [])
|
||||||
|
|
@ -93,6 +103,7 @@ def get_paper(url: str) -> str:
|
||||||
), "Site changed, need human verify"
|
), "Site changed, need human verify"
|
||||||
|
|
||||||
response = requests.get(url)
|
response = requests.get(url)
|
||||||
|
_raise_for_status_200(response)
|
||||||
response.encoding = "utf-8"
|
response.encoding = "utf-8"
|
||||||
soup = bs4.BeautifulSoup(response.text, features="html.parser")
|
soup = bs4.BeautifulSoup(response.text, features="html.parser")
|
||||||
container = soup.find("td", class_="b12c")
|
container = soup.find("td", class_="b12c")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user