在线观看不卡亚洲电影_亚洲妓女99综合网_91青青青亚洲娱乐在线观看_日韩无码高清综合久久

鍍金池/ 問答/Python/ 寫入txt文件時(shí)出錯(cuò)

寫入txt文件時(shí)出錯(cuò)

# -*- conding:utf-8 -*-

import requests,re

url_list_all = ['http://finance.eastmoney.com/news/cgnjj_{}.html'.format(pages) for pages in range(1,26)]


def content(url):
    req = requests.get(url,timeout = 500)
    print('content_ulr:',url)
    req.encoding = 'utf-8'
    content_source = req.text
    title = re.search('<h1>(.*?)</h1>',content_source,re.S).group(1)
    source_time = re.search('<div class="time">(.*?)</div>', content_source, re.S).group(1)
    source = re.search('<span>來源:</span>(.*?)</div>', content_source, re.S).group(1)
    #description = re.search('<div class="abstract">摘要</div>(.*?)</div>', content_source, re.S).group(1)
    content_body = re.search('<!--文章主體-->(.*?)<!--責(zé)任編輯-->', content_source, re.S).group(1)
    content_content = {'title:': title, 'time:': source_time, 'source:': source, 'content:': content_body}
    print(content_content)
    # print('標(biāo)題:',title)
    # print('時(shí)間:',source_time)
    # print('來源:',source)
    # #print('摘要:',description)
    # print('內(nèi)容:',content_body)

def file_save(content):
    with open('C:\\Users\\Administrator\\Desktop\\east.txt','a') as f:
        f.write(content + '\n')
        f.close()
for url in url_list_all:
    print('url_list:',url)
    req = requests.get(url)
    req.encoding = 'utf-8'
    list_content = req.text
    content_url = re.findall('<p class="title">.*?<a href="(.*?)" target="_blank">',list_content,re.S)
    #print(content_url)
    #pic = re.search('<div class="image">.*?<a.*?"><img src="(.*?)" />.*?</a>',list_content,re.S).group(1)
    for url in content_url:
        content(url)
        file_save(content_content)

以上,代碼比較混亂...如果只是正常輸出的話,沒問題,但是調(diào)用函數(shù)保存的時(shí)候就不行。試了幾種方法,報(bào)錯(cuò)類型有大概有三種:1、字典不能和字符串相加 2、參數(shù)必須是字符串 3、找不到content_content 請(qǐng)求幫忙看下

回答
編輯回答
失魂人

你的代碼有幾個(gè)地方有問題,按照下面的代碼再試試:

#-- conding:utf-8 --
import requests
import re
import json


url_list_all = ['http://finance.eastmoney.com/news/cgnjj_{}.html'.format(pages) for pages in range(1,3)] #減少了頁碼,測(cè)試的快點(diǎn)


def content(url):
        req = requests.get(url,timeout = 1000)
        print('content_ulr:',url)
        req.encoding = 'utf-8'
        content_source = req.text
        title = re.search('<h1>(.*?)</h1>',content_source,re.S).group(1)
        source_time = re.search('<div class="time">(.*?)</div>', content_source, re.S).group(1)
        source = re.search('<span>來源:</span>(.*?)</div>', content_source, re.S).group(1)
        content_body = re.search('<!--文章主體-->(.*?)<!--責(zé)任編輯-->', content_source, re.S).group(1)
        content_content = {'title:': title, 'time:': source_time, 'source:': source, 'content:': content_body}
        return content_content


def file_save(content):
        with open('./east.txt','a') as f:
                f.write(str(content))

for url in url_list_all:
        req = requests.get(url)
        req.encoding = 'utf-8'
        list_content = req.text
        content_url = re.findall('<p class="title">.*?<a href="(.*?)" target="_blank">',list_content,re.S)

        for u in content_url:
                cnt = content(u)
                file_save(cnt)
        
    
2017年3月24日 04:08