Powered by GitBook

使用 bs4 爬取获取贵州农产品

一、爬取数据步骤

1、爬取网站地址

2、实现代码

import requests
from bs4 import BeautifulSoup

class Food(object):
    def __init__(self):
        self.url = 'http://www.gznw.gov.cn/priceInfo/getPriceInfoByAreaId.jx?areaid=22572&page=1'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
        }

    @property
    def get_html(self):
        """
        抓取网页
        :return:
        """
        response = requests.get(url=self.url, headers=self.headers)
        if response.status_code == 200:
            return response.text
        return ''

    def down_data(self):
        """
        下载数据
        :return:
        """
        soup = BeautifulSoup(self.get_html, 'lxml')
        table = soup.find('table', attrs={'class': 'table table-hover'})
        trs = table.find('tbody').find_all('tr')
        food_list = []
        for tr in trs:
            food_dict = {}
            tds = tr.find_all('td')
            name = tds[0].get_text()
            price = tds[1].get_text()
            address = tds[3].get_text()
            time = tds[4].get_text()

            food_dict['name'] = name
            food_dict['price'] = price
            food_dict['address'] = address
            food_dict['time'] = time

            food_list.append(food_dict)
        return food_list

if __name__ == "__main__":
    foo = Food()
    print(foo.down_data())

results matching ""

No results matching ""