Traceback (most recent call last):
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 435, in _update_chunk_length
self.chunk_left = int(line, 16)
ValueError: invalid literal for int() with base 16: b''
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 226, in _error_catcher
yield
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 486, in read_chunked
self._update_chunk_length()
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 439, in _update_chunk_length
raise httplib.IncompleteRead(line)
http.client.IncompleteRead: IncompleteRead(0 bytes read)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/models.py", line 660, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 340, in stream
for line in self.read_chunked(amt, decode_content=decode_content):
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 514, in read_chunked
self._original_response.close()
File "/usr/lib/python3.5/contextlib.py", line 77, in __exit__
self.gen.throw(type, value, traceback)
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/packages/urllib3/response.py", line 244, in _error_catcher
raise ProtocolError('Connection broken: %r' % e, e)
requests.packages.urllib3.exceptions.ProtocolError: ('Connection broken: IncompleteRead(0 bytes read)', IncompleteRead(0 bytes read))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "spider.py", line 40, in main
poster.get(callback=lambda posts, forum=forum_name: store_to_db(posts, forum))
File "/home/ubuntu/workspace/dcard-spider/dcard/posts.py", line 51, in get
return PostsResult(self.ids, bundle, callback)
File "/home/ubuntu/workspace/dcard-spider/dcard/posts.py", line 73, in __init__
self.results = self.format(bundle, callback)
File "/home/ubuntu/workspace/dcard-spider/dcard/posts.py", line 97, in format
comments = comments.get() if comments else []
File "/usr/lib/python3.5/multiprocessing/pool.py", line 608, in get
raise self._value
File "/usr/lib/python3.5/multiprocessing/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.5/multiprocessing/pool.py", line 44, in mapstar
return list(map(*args))
File "/home/ubuntu/workspace/dcard-spider/dcard/posts.py", line 60, in _serially_get_comments
_comments = client.get(comments_url, params=params)
File "/home/ubuntu/workspace/dcard-spider/dcard/utils.py", line 31, in get
response = self.req_session.get(url, **kwargs)
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/sessions.py", line 480, in get
return self.request('GET', url, **kwargs)
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/sessions.py", line 468, in request
resp = self.send(prep, **send_kwargs)
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/sessions.py", line 608, in send
r.content
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/models.py", line 737, in content
self._content = bytes().join(self.iter_content(CONTENT_CHUNK_SIZE)) or bytes()
File "/home/ubuntu/workspace/dcard/venv/lib/python3.5/site-packages/requests/models.py", line 663, in generate
raise ChunkedEncodingError(e)
requests.exceptions.ChunkedEncodingError: ('Connection broken: IncompleteRead(0 bytes read)', IncompleteRead(0 bytes read))
requests.exceptions.ChunkedEncodingError: (‘Connection broken: IncompleteRead(0 bytes read)’, IncompleteRead(0 bytes read))报错原因,服务器http协议1.0,而python搞的是1.1,解决办法就是指定客户端http协议版本,毕竟咱们无法去改服务器的代码。
解决办法: python3import http.client
http.client.HTTPConnection._http_vsn = 10
http.client.HTTPConnection._http_vsn_str = 'HTTP/1.0'
python2
import httplib
httplib.HTTPConnection._http_vsn = 10
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'