1.如何处理数据

BeautifulSoup

解析数据
提出数据

1	pip install beautifulsoup4

2.案例

2.1获取豆瓣图书TOP250当中

# -*- coding: utf-8 -*-
import requests
# 引入 BeautifulSoup
from bs4 import BeautifulSoup

headers = {
  'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
res = requests.get('https://book.douban.com/top250', headers=headers)
# 第一个参数为网页源代码，第二个参数为解析器
soup = BeautifulSoup(res.text, 'html.parser')
# print(soup)
# items = soup.find_all('div', class_='pl2')
items = soup.select('div.pl2 a')
# print(items)
for i in items:
  # tag = i.find('a')
  # name = ''.join(tag.text.split())
  # name = tag['title']
  # link = tag['href']
  name = i['title']
  link = i['href']
  print(name, link)

2.2获取豆瓣电影的数据(电影名和链接)

# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup

headers = {
  'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}

res = requests.get('https://movie.douban.com/top250', headers=headers)
soup = BeautifulSoup(res.text,'html.parser')

items = soup.select('div.hd a')
for i in items:
  link = i['href']
  tag = i.find(class_='title')
  name = tag.text
  print(name,link)