完整的下载数据存储的代码

import requests
from bs4 import BeautifulSoup
import json
import codecs
import pymysql
import pymongo

class Food(object):
    def __init__(self):
        self.url = ['http://www.gznw.gov.cn/priceInfo/getPriceInfoByAreaId.jx?areaid=22572&page='.format(str(i)) for i
                    in range(1, 100)]
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
        }

    def down_data(self):
        """
        下载数据
        :return:
        """
        food_list = []
        # 使用codecs的方法打开
        fb = codecs.open('food.json', 'wb', encoding='utf-8')
        for current_url in self.url:
            response = requests.get(url=current_url, headers=self.headers)
            soup = BeautifulSoup(response.text, 'lxml')
            table = soup.find('table', attrs={'class': 'table table-hover'})
            trs = table.find('tbody').find_all('tr')

            for tr in trs:
                food_dict = {}
                tds = tr.find_all('td')
                name = tds[0].get_text()
                price = tds[1].get_text()
                address = tds[3].get_text()
                time = tds[4].get_text()

                food_dict['name'] = name
                food_dict['price'] = price
                food_dict['address'] = address
                food_dict['time'] = time
                # 写入到本地文件中
                fb.write(json.dumps(food_dict, indent=2, ensure_ascii=False) + ',\n')

                # 写入到mysql数据库中
                self.insert_table(name, price, address, time)
                # 写入到mongod数据库中
                self.insert_mongodb(food_dict)
                print('---------正在下载中:{0}--------'.format(name))
                food_list.append(food_dict)
        fb.close()
        return food_list

    def insert_table(self, name, price, address, time):
        """
        对爬取的数据插入到数据库中
        :param price:
        :param address:
        :param time:
        :return:
        """
        # 创建数据库连接
        db = pymysql.connect(host='127.0.0.1', user='root', passwd='root', db='nodejs', port=3306,
                             charset='utf8')

        # 获取数据库句柄
        cursor = db.cursor()

        # 插入数据的sql语句
        sql = 'insert into food (name, price, address, time) values (%s, %s, %s, %s)'
        try:
            # 执行 sql 语句
            value = (name, price, address, time)
            cursor.execute(sql, value)
            # 提交事务
            db.commit()
            return True
        except BaseException as e:  # 如果发生错误则回滚
            db.rollback()
            print(e)
        finally:  # 关闭游标连接
            cursor.close()
            # 关闭数据库连接
            db.close()

    def insert_mongodb(self, data):
        # 创建连接
        client = pymongo.MongoClient(host='localhost', port=27017)
        # 指定连接的数据库
        db = client.test
        # 指定的集合(类似表)
        collection = db.food
        result = collection.insert_one(data)
        return result


def create_table():
    # 创建数据库连接
    db = pymysql.connect(host='127.0.0.1', user='root', passwd='root', db='nodejs', port=3306,
                         charset='utf8')

    # 获取数据库句柄
    cursor = db.cursor()

    cursor.execute('DROP TABLE IF EXISTS food')
    # 创建article表的sql语句
    sql = """
          create table if not exists food(
            id INT NOT NULL AUTO_INCREMENT,
            name VARCHAR(50) ,
            price FLOAT,
            address VARCHAR(100),
            time DATE,
            PRIMARY KEY (`id`)
          )
      """

    try:
        # 执行sql语句
        cursor.execute(sql)
        # 提交事务
        db.commit()
        print('创建表成功')
    except pymysql.Error as e:
        # 数据回滚
        db.rollback()
        print(e)
    finally:
        if db:
            db.close()

if __name__ == "__main__":
    foo = Food()
    foo.down_data()
    # create_table()

results matching ""

    No results matching ""