bs4 <span style="color: #0000ff">def<span style="color: #000000"> get_html_text(url):
<span style="color: #800000">'''<span style="color: #800000">返回网页的HTML代码<span style="color: #800000">'''
<span style="color: #0000ff">try<span style="color: #000000">:
res = requests.get(url,timeout = 6<span style="color: #000000">)
res.raise_for_status()
res.encoding =<span style="color: #000000"> res.apparent_encoding
<span style="color: #0000ff">return<span style="color: #000000"> res.text
<span style="color: #0000ff">except<span style="color: #000000">:
<span style="color: #0000ff">return <span style="color: #800000">''
<span style="color: #0000ff">def<span style="color: #000000"> fill_ulist(ulist,html):
<span style="color: #800000">'''<span style="color: #800000">将我们所需的数据写入一个列表ulist<span style="color: #800000">'''
<span style="color: #008000">#</span><span style="color: #008000">解析HTML代码,并获得解析后的对象soup</span>
soup = BeautifulSoup(html,<span style="color: #800000">'</span><span style="color: #800000">html.parser</span><span style="color: #800000">'</span><span style="color: #000000">)
</span><span style="color: #008000">#</span><span style="color: #008000">遍历得到第一个<tbody>标签</span>
tbody =<span style="color: #000000"> soup.tbody
</span><span style="color: #008000">#</span><span style="color: #008000">遍历<tbody>标签的孩子,即<tbody>下的所有<tr>标签及字符串</span>
<span style="color: #0000ff">for</span> tr <span style="color: #0000ff">in</span><span style="color: #000000"> tbody.children:
</span><span style="color: #008000">#</span><span style="color: #008000">排除字符串</span>
<span style="color: #0000ff">if</span><span style="color: #000000"> isinstance(tr,bs4.element.Tag):
</span><span style="color: #008000">#</span><span style="color: #008000">使用find_all()函数找到tr标签中的所有<td>标签</span>
u = tr.find_all(<span style="color: #800000">'</span><span style="color: #800000">td</span><span style="color: #800000">'</span><span style="color: #000000">)
</span><span style="color: #008000">#</span><span style="color: #008000">将<td>标签中的字符串内容写入列表ulist</span>
ulist.append([u[0].string,u[1].string,u[2].string,u[3<span style="color: #000000">].string])
<span style="color: #0000ff">def<span style="color: #000000"> display_urank(ulist):
<span style="color: #800000">'''<span style="color: #800000">格式化输出大学排名<span style="color: #800000">'''<span style="color: #000000">
tplt = <span style="color: #800000">"<span style="color: #800000">{:^5}t{:{ocp}^12}t{:{ocp}^5}t{:^5}<span style="color: #800000">"
<span style="color: #008000">#<span style="color: #008000">方便中文对其显示,使用中文字宽作为站字符,chr(12288)为中文空格符
<span style="color: #0000ff">print(tplt.format(<span style="color: #800000">"<span style="color: #800000">排名<span style="color: #800000">",<span style="color: #800000">"<span style="color: #800000">大学名称<span style="color: #800000">",<span style="color: #800000">"<span style="color: #800000">省市<span style="color: #800000">",<span style="color: #800000">"<span style="color: #800000">总分<span style="color: #800000">",ocp = chr(12288<span style="color: #000000">)))
<span style="color: #0000ff">for u <span style="color: #0000ff">in<span style="color: #000000"> ulist:
<span style="color: #0000ff">print(tplt.format(u[0],u[1],u[2],u[3],ocp = chr(12288<span style="color: #000000">)))
<span style="color: #0000ff">def<span style="color: #000000"> write_in_file(ulist,file_path):
<span style="color: #800000">'''<span style="color: #800000">将大学排名写入文件<span style="color: #800000">'''<span style="color: #000000">
tplt = <span style="color: #800000">"<span style="color: #800000">{:^5}t{:{ocp}^12}t{:{ocp}^5}t{:^5}n<span style="color: #800000">"<span style="color: #000000">
with open(file_path,<span style="color: #800000">'<span style="color: #800000">w<span style="color: #800000">'<span style="color: #000000">) as file_object:
file_object.write(<span style="color: #800000">'<span style="color: #800000">软科中国最好大学排名2019版:nn<span style="color: #800000">'<span style="color: #000000">)
file_object.write(tplt.format(<span style="color: #800000">"<span style="color: #800000">排名<span style="color: #800000">",ocp = chr(12288<span style="color: #000000">)))
<span style="color: #0000ff">for u <span style="color: #0000ff">in<span style="color: #000000"> ulist:
file_object.write(tplt.format(u[0],u[1],ocp = chr(12288<span style="color: #000000">)))
<span style="color: #0000ff">def<span style="color: #000000"> main():
<span style="color: #800000">'''<span style="color: #800000">主函数<span style="color: #800000">'''<span style="color: #000000">
ulist =<span style="color: #000000"> []
url = <span style="color: #800000">'<span style="color: #800000">http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html<span style="color: #800000">'<span style="color: #000000">
file_path = <span style="color: #800000">'<span style="color: #800000">university rankings.txt<span style="color: #800000">'<span style="color: #000000">
html =<span style="color: #000000"> get_html_text(url)
fill_ulist(ulist,html)
display_urank(ulist)
write_in_file(ulist,file_path)
main()