使用 bs4 爬取获取贵州农产品

一、爬取数据步骤

  • 1、爬取网站地址

  • 2、实现代码

    import requests
    from bs4 import BeautifulSoup
    
    class Food(object):
        def __init__(self):
            self.url = 'http://www.gznw.gov.cn/priceInfo/getPriceInfoByAreaId.jx?areaid=22572&page=1'
            self.headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
            }
    
        @property
        def get_html(self):
            """
            抓取网页
            :return:
            """
            response = requests.get(url=self.url, headers=self.headers)
            if response.status_code == 200:
                return response.text
            return ''
    
        def down_data(self):
            """
            下载数据
            :return:
            """
            soup = BeautifulSoup(self.get_html, 'lxml')
            table = soup.find('table', attrs={'class': 'table table-hover'})
            trs = table.find('tbody').find_all('tr')
            food_list = []
            for tr in trs:
                food_dict = {}
                tds = tr.find_all('td')
                name = tds[0].get_text()
                price = tds[1].get_text()
                address = tds[3].get_text()
                time = tds[4].get_text()
    
                food_dict['name'] = name
                food_dict['price'] = price
                food_dict['address'] = address
                food_dict['time'] = time
    
                food_list.append(food_dict)
            return food_list
    
    if __name__ == "__main__":
        foo = Food()
        print(foo.down_data())
    

results matching ""

    No results matching ""