使用 bs4 爬取获取贵州农产品
一、爬取数据步骤
1、爬取网站地址
2、实现代码
import requests from bs4 import BeautifulSoup class Food(object): def __init__(self): self.url = 'http://www.gznw.gov.cn/priceInfo/getPriceInfoByAreaId.jx?areaid=22572&page=1' self.headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36', } @property def get_html(self): """ 抓取网页 :return: """ response = requests.get(url=self.url, headers=self.headers) if response.status_code == 200: return response.text return '' def down_data(self): """ 下载数据 :return: """ soup = BeautifulSoup(self.get_html, 'lxml') table = soup.find('table', attrs={'class': 'table table-hover'}) trs = table.find('tbody').find_all('tr') food_list = [] for tr in trs: food_dict = {} tds = tr.find_all('td') name = tds[0].get_text() price = tds[1].get_text() address = tds[3].get_text() time = tds[4].get_text() food_dict['name'] = name food_dict['price'] = price food_dict['address'] = address food_dict['time'] = time food_list.append(food_dict) return food_list if __name__ == "__main__": foo = Food() print(foo.down_data())