#python bs4库获取百度网首页上的文字内容
import requests
from bs4 import BeautifulSoup
#requests 读取网页内容res.content或文本res.text
url = "https://www.baidu.com"
res = requests.get(url).content.decode("utf8")
print(res)
#print(res.content.decode("utf8"))
# res = requests.get(url)
# print(res.text)
soup = BeautifulSoup(res,"html5lib")
# result = soup.a.string #可获取一个a标签内文本
# print(result)
#读取a,class=mnav标签文本
print("## 读取a,class=mnav标签文本")
results = soup.find_all('a', {'class': 'mnav'})
for result in results:
print(result.string)
#读取a,class=mnav标签文本,放入name列表中
print("## 读取a,class=mnav标签文本,放入name列表中")
name = []
items = soup.find_all("a",class_="mnav")
for item in items:
#print(item.string)
name.append(item.string)
print(name)
#正则表达式方法html中读取文本
print("## 正则表达式方法html中读取文本 ")
import re
items = soup.find_all("a",class_="mnav")
for item in items:
# print(type(item))
pattern = r'>(.*?)<'
result = re.findall(pattern, str(item))
print(result)