【Python网络爬虫与信息提取-b站北京理工大学视频】.MOOC. 北京理工大学 p47股票个股信息爬取失败【请教大神】

Prunella ·

更新时间:2024-11-13

· 802 次阅读

视频连接：https://www.bilibili.com/video/BV1kx411S7Fh?p=47
跟着老师做的，但是老师讲的百度股票已经没有了，所以用雪球网替代了，不过没有输出，也没有报错，请大神帮忙看看什么问题，谢谢！

import re
import requests
from bs4 import BeautifulSoup
def getHTMLText(url,header):
    try:
        r = requests.get(url,headers=header,timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        continue
def getStockList(lst,stock_list_url):
    html = getHTMLText(stock_list_url,head1)
    soup = BeautifulSoup(html,"html.parser")
    a = soup.find_all("a")
    for i in a:
        try:
            href = i.attrs['href']
            lst.append(re.findall(r"[s][hz]\d{6}",href)[0]) #findall返回列表，加[0]返回字符串
        except:
            pass
def getStockInfo(lst,stock_info_url,file):
    for stock in lst:
        url = stock_info_url + stock
        html = getHTMLText(url,head2)
        try:
            if html =='':
                continue
            infodict ={}
            soup = BeautifulSoup(html,"html.parser")
            stockinfo = soup.find('div',attrs={"class":"container-sm float-left stock__main"})
            name = stockinfo.find('div',attrs={"class":"stock-name"})        
            infodict.update({'stock_name':name.text.split()[0]})
            keylist = stockinfo.find_all('td')
            for i in range(len(keylist)):
                key = keyvalue[i].text[0] #因为雪球网和百度股票不同，这里text返回两个值
                value = keyvalue[i].text[-1]
                infodict[key]=value
                with open(file,'a',encoding='utf-8') as f:
                    f.write(str(infodict)+'\n')
                    f.close()
        except:
#            traceback.print_exc()
            continue
def main():
    stock_list_url = 'http://quote.eastmoney.com/stock_list.html#sh'
    stock_info_url = 'https://xueqiu.com/S/'
    head1 = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1'}
    head2 = {
    'Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
    'Sec-Fetch-Dest': 'document',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-User': '?1',
    'Referer': 'https://xueqiu.com/',
    'Accept-Language': 'zh-CN,zh;q=0.9',
}
    output_file = 'snowstock.txt'
    lst = []
    getStockList(lst,stock_list_url)
    getStockInfo(lst,stock_info_url,output_file)
if __name__ == '__main__':
    main()

作者：celine11111

北京理工大学爬虫 p4 股票大学 Python

1024 个赞