在学习python爬虫,准备把爬到的数据写到json文件中时,发现报错
原代码
#coding:utf-8 import json from bs4 import BeautifulSoup import requests user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:55.0) Gecko/20100101 Firefox/55.0' headers = {'User-Agent':user_agent} r = requests.get('http://seputu.com',headers=headers) soup = BeautifulSoup(r.text, 'html.parser',from_encoding='utf-8') content = [] for mulu in soup.find_all(class_="mulu"): h2 = mulu.find('h2') if h2 != None: h2_title = h2.string list = [] for a in mulu.find(class_="box").find_all('a'): href = a.get('href') box_title = a.get('title') list.append({'href':href,'bot_title':box_title}) content.append({'title':h2_title,'content':list}) with open('qiye.json','wb') as fp: json.dump(content,fp=fp,indent=4,ensure_ascii=False)报错:/usr/bin/python2.7 /home/repeat/PycharmProjects/untitled/qiye.py
/home/repeat/.local/lib/python2.7/site-packages/bs4/__init__.py:146: UserWarning: You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.
warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
Traceback (most recent call last):
File "/home/repeat/PycharmProjects/untitled/qiye.py", line 24, in <module>
json.dump(content,fp=fp,indent=4,ensure_ascii=False)
File "/usr/lib/python2.7/json/__init__.py", line 190, in dump
fp.write(chunk)
UnicodeEncodeError: 'ascii' codec can't encode characters in position 20-23: ordinal not in range(128)
Process finished with exit code 1
解决办法:
把这段代码
with open('qiye.json','wb') as fp:
json.dump(content,fp=fp,indent=4,ensure_ascii=False)
改成
with codecs.open('qiye.json','wb','utf-8') as fp:
json.dump(content,fp=fp,indent=4,ensure_ascii=False)
即可。