完整的下载数据存储的代码
import requests
from bs4 import BeautifulSoup
import json
import codecs
import pymysql
import pymongo
class Food(object):
def __init__(self):
self.url = ['http://www.gznw.gov.cn/priceInfo/getPriceInfoByAreaId.jx?areaid=22572&page='.format(str(i)) for i
in range(1, 100)]
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
}
def down_data(self):
"""
下载数据
:return:
"""
food_list = []
fb = codecs.open('food.json', 'wb', encoding='utf-8')
for current_url in self.url:
response = requests.get(url=current_url, headers=self.headers)
soup = BeautifulSoup(response.text, 'lxml')
table = soup.find('table', attrs={'class': 'table table-hover'})
trs = table.find('tbody').find_all('tr')
for tr in trs:
food_dict = {}
tds = tr.find_all('td')
name = tds[0].get_text()
price = tds[1].get_text()
address = tds[3].get_text()
time = tds[4].get_text()
food_dict['name'] = name
food_dict['price'] = price
food_dict['address'] = address
food_dict['time'] = time
fb.write(json.dumps(food_dict, indent=2, ensure_ascii=False) + ',\n')
self.insert_table(name, price, address, time)
self.insert_mongodb(food_dict)
print('---------正在下载中:{0}--------'.format(name))
food_list.append(food_dict)
fb.close()
return food_list
def insert_table(self, name, price, address, time):
"""
对爬取的数据插入到数据库中
:param price:
:param address:
:param time:
:return:
"""
db = pymysql.connect(host='127.0.0.1', user='root', passwd='root', db='nodejs', port=3306,
charset='utf8')
cursor = db.cursor()
sql = 'insert into food (name, price, address, time) values (%s, %s, %s, %s)'
try:
value = (name, price, address, time)
cursor.execute(sql, value)
db.commit()
return True
except BaseException as e:
db.rollback()
print(e)
finally:
cursor.close()
db.close()
def insert_mongodb(self, data):
client = pymongo.MongoClient(host='localhost', port=27017)
db = client.test
collection = db.food
result = collection.insert_one(data)
return result
def create_table():
db = pymysql.connect(host='127.0.0.1', user='root', passwd='root', db='nodejs', port=3306,
charset='utf8')
cursor = db.cursor()
cursor.execute('DROP TABLE IF EXISTS food')
sql = """
create table if not exists food(
id INT NOT NULL AUTO_INCREMENT,
name VARCHAR(50) ,
price FLOAT,
address VARCHAR(100),
time DATE,
PRIMARY KEY (`id`)
)
"""
try:
cursor.execute(sql)
db.commit()
print('创建表成功')
except pymysql.Error as e:
db.rollback()
print(e)
finally:
if db:
db.close()
if __name__ == "__main__":
foo = Food()
foo.down_data()