&

分類:

推薦最新等你回答

誮惜顏回答

conda install mod_wsgi
pip install mod_wsgi
2選1

赱丅呿回答

為什么要弄兩個(gè)spider呢？你完完全全可以在第一個(gè)spider下再寫一個(gè)parse_shuping_two啊.

對(duì)你的代碼稍作了修改,可以達(dá)到你的要求（別忘了在settings.py中加上ITEM_PIPELINES = {'ysw.pipelines.YswPipeline': 300}以激活pipeline）：
spiders/shuping.py

# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import Request
from ysw.items import YswItem, YswItems
import json
from scrapy import Selector
import re


class ShupingSpider(scrapy.Spider):
    name = 'shuping'
    #allowed_domains = ['www.yousuu.com']
    start_urls = ['http://www.yousuu.com/book/124600']

    #此方法解析評(píng)論第一頁的一級(jí)書評(píng)
    def parse(self, response):

        #遍歷每個(gè)一級(jí)書評(píng)，獲得信息
        for r in response.xpath('//*[@id="content"]/div'):
            item = YswItem()

            #發(fā)帖時(shí)間
            item['time'] = r.xpath('string(./div/div/div[1]/div/span[2])').extract_first().strip()

            #獲得贊同數(shù)
            agree = r.xpath('string(./div/div/div[2]/button[1]/span)').extract_first().strip()
            if agree:
                item['agree'] = agree
            else:
                item['agree'] = '0'

            #一級(jí)書評(píng)內(nèi)容
            item['fir_text'] = r.xpath('string(./div/div/p)').extract_first().replace('\r\n', '').replace(' ', '')

            #二級(jí)評(píng)論數(shù)：
            sec_num = r.xpath('string(./div/div/div[2]/button[2]/span)').extract_first().strip()
            if sec_num:
                item['sec_num'] = sec_num

                #獲取二級(jí)評(píng)論url的組成部分cid
                cid = r.xpath('./@cid').extract_first().strip()

                #補(bǔ)全二級(jí)評(píng)論第一頁的url
                sec_text_url = "http://www.yousuu.com/ajax/getonecomment?render=true&cid={}".format(cid)

                #將每一個(gè)一級(jí)書評(píng)下的所有二級(jí)書評(píng)的獲取都交給sp_two.parse
                sec_text_list = []
                yield Request(sec_text_url, meta={'sec_text_list':sec_text_list, 'item':item}, callback=self.parse_shuping_two)
            else:
                item['sec_num'] = '0'
                yield item
        return print('一級(jí)書評(píng)第一頁!')

    def parse_shuping_two(self, response):
        items = YswItems()

        # json格式轉(zhuǎn)為python結(jié)構(gòu)數(shù)據(jù)
        jsobj = json.loads(response.body)

        # 從字典中提取html的值，也就是二級(jí)評(píng)論的html格式文本
        html = jsobj['html']

        # 獲得二級(jí)書評(píng)第一頁的所有二級(jí)書評(píng)內(nèi)容，放在列表result中，迭代這個(gè)parse方法時(shí)，依次是第2,3，頁等等
        result = Selector(text=html).xpath('//p/text()').extract()

        # 獲得上一個(gè)Request傳遞過來的參數(shù), 第一次是一個(gè)空列表
        sec_text_list = response.meta['sec_text_list']

        # 獲得shuping.parse()傳來的item
        item = response.meta['item']

        '''每一頁的二級(jí)評(píng)論內(nèi)容放在一個(gè)列表result中，這個(gè)列表又放在列表sec_text_list中
        二級(jí)書評(píng)每一頁的第一個(gè)書評(píng)都是它的一級(jí)書評(píng)內(nèi)容，所以從每一頁新的二級(jí)書評(píng)從第二個(gè)算起'''
        sec_text_list.extend(result[1:])

        # 判斷二級(jí)評(píng)論是否還有下一頁
        nextpage = Selector(text=html).xpath('//a[text()="更多回復(fù)"]/@onclick').extract_first()
        if nextpage:
            # 獲得下一頁的cid
            cid = re.search(r"(.*?)'(.*?)',(.*)", nextpage).group(2)
            # 獲取下一頁的t
            t = re.search("(.*),(.*?)\)", nextpage).group(2)
            # 組裝二級(jí)評(píng)論下一頁的url
            next_page_url = "http://www.yousuu.com/ajax/getcommentreply?cid={}&t={}&render=true".format(cid, t)
            # print('next_page_url')
            # 迭代這個(gè)方法繼續(xù)獲得下一頁的二級(jí)評(píng)論內(nèi)容
            yield Request(next_page_url, meta={'sec_text_list': sec_text_list, 'item': item}, callback=self.parse_shuping_two)
        else:

            items['sec_text'] = sec_text_list
            items['time'] = item['time']
            items['agree'] = item['agree']
            items['sec_num'] = item['sec_num']
            items['fir_text'] = item['fir_text']

            print('已獲取此一級(jí)書評(píng)的全部二級(jí)書評(píng)！')

            yield items

pipelines.py

# -*- coding: utf-8 -*-
import os

class YswPipeline(object):
    def process_item(self, item, spider):

        base_dir = os.getcwd()
        file_name = base_dir + '/SP.txt'

        with open(file_name, 'a', encoding='utf-8') as f:
            if item['sec_num'] == '0':
                f.write('時(shí)間：' + item['time'] + '\n'
                    '贊同數(shù)：' + item['agree'] + '\n'
                    '二級(jí)評(píng)論數(shù)量：' + item['sec_num'] + '\n'
                    '一級(jí)評(píng)論內(nèi)容：' + item['fir_text'] + '\n\n'
                    )
            else:
                f.write('時(shí)間：' + item['time'] + '\n'
                    '贊同數(shù)：' + item['agree'] + '\n'
                    '二級(jí)評(píng)論數(shù)量：' + item['sec_num'] + '\n'
                    '一級(jí)評(píng)論內(nèi)容：' + item['fir_text'] + '\n'
                    '二級(jí)評(píng)論內(nèi)容：' + '\n'.join(item['sec_text']) + '\n\n'
                    )
        return item

ftp如何做到精確限速？

孤島回答

對(duì)比了一下scp，可以做到更精細(xì)的限速（使用了nanosleep），但由于集群上用了kerberos做認(rèn)證，ssh比較麻煩，不知道有沒有人了解過其他ftp的框架或開源產(chǎn)品，可以做到nanosec級(jí)限速（不限語言）？要是沒有就只能改改開源產(chǎn)品的源碼了，以下是scp的限速源碼片段：

void
bandwidth_limit(struct bwlimit *bw, size_t read_len)
{
    u_int64_t waitlen;
    struct timespec ts, rm;

    if (!timerisset(&bw->bwstart)) {
        gettimeofday(&bw->bwstart, NULL);
        return;
    }

    bw->lamt += read_len;
    if (bw->lamt < bw->thresh)
        return;

    gettimeofday(&bw->bwend, NULL);
    timersub(&bw->bwend, &bw->bwstart, &bw->bwend);
    if (!timerisset(&bw->bwend))
        return;

    bw->lamt *= 8;
    waitlen = (double)1000000L * bw->lamt / bw->rate;

    bw->bwstart.tv_sec = waitlen / 1000000L;
    bw->bwstart.tv_usec = waitlen % 1000000L;

    if (timercmp(&bw->bwstart, &bw->bwend, >)) {
        timersub(&bw->bwstart, &bw->bwend, &bw->bwend);

        /* Adjust the wait time */
        if (bw->bwend.tv_sec) {
            bw->thresh /= 2;
            if (bw->thresh < bw->buflen / 4)
                bw->thresh = bw->buflen / 4;
        } else if (bw->bwend.tv_usec < 10000) {
            bw->thresh *= 2;
            if (bw->thresh > bw->buflen * 8)
                bw->thresh = bw->buflen * 8;
        }

        TIMEVAL_TO_TIMESPEC(&bw->bwend, &ts);
        while (nanosleep(&ts, &rm) == -1) {
            if (errno != EINTR)
                break;
            ts = rm;
        }
    }

    bw->lamt = 0;
    gettimeofday(&bw->bwstart, NULL);
}

最終使用了vsftpd，看過源碼用的是nanosleep

谷歌瀏覽器input自動(dòng)填充的黃色背景樣式覆蓋不了

青黛色回答

:-webkit-autofill {-webkit-text-fill-color: #fff !important;
  transition: background-color 5000s ease-in-out 0s;//設(shè)置5000s后改變填充密碼框的背景色，唯一使背景透明的方法
}

可以設(shè)置xxx秒之后變色，之前嘗試直接修改默認(rèn)樣式，但是失敗，用這個(gè)替代方案吧

如何重構(gòu)正在快速迭代的項(xiàng)目？

笨笨噠回答

開分支
按模塊做重構(gòu)
測(cè)試
合并

重復(fù) 2-3

在mac 中 python 中的scrapy 安裝完成，但是pip list 找不到

墨染殤回答

我推測(cè)你的環(huán)境中應(yīng)該同時(shí)擁有 python3 和 python2, 現(xiàn)在版本的 pip 將直接指向 python3, 而 python2 需要使用 pip2.
在我的環(huán)境中:

$ pip list
Package    Version
---------- -------
pip        10.0.1
setuptools 20.10.1

$ pip2 list
Package    Version
---------- -------
pip        10.0.1
setuptools 20.10.1

$ sudo pip install six
$ pip list
Package    Version
---------- -------
pip        10.0.1
setuptools 20.10.1
$ pip2 list
Package    Version
---------- -------
pip        10.0.1
setuptools 20.10.1
six        1.11.0

如果這樣安裝的話:

$ pip install six --user
$ pip list
Package    Version
---------- -------
pip        10.0.1
setuptools 20.10.1
six        1.11.0
$ pip2 list
Package    Version
---------- -------
pip        10.0.1
setuptools 20.10.1

綜上, 你使用sudo pip install scrapy 實(shí)際上是把 scrapy 安裝到你的 python2 環(huán)境中去了, 而pip命令顯示的是python3環(huán)境中的包, 這時(shí)候沒有 scarpy.
解決方法:
使用pip install scrapy --user將 scrapy 安裝到 python3 環(huán)境中(只能在 python3 中使用).
或者pip2 list查看 python2 環(huán)境中的包(只能在 python2 中使用).
使用 sudo 的方式安裝 package 是不推薦的. see[What are the risks of running 'sudo pip'?
](https://stackoverflow.com/que...

不理解shallowcopy用意何在？

練命回答

1.對(duì)象的拷貝方法數(shù)字類型的是拷貝數(shù)值,其他全部是拷貝地址,你的兩個(gè)對(duì)象里面的["a","b"]內(nèi)存是一樣的,所以修改引用類型的數(shù)值,其他引用到這個(gè)的都會(huì)跟著改變.

2.python數(shù)據(jù)類型如下,包括自定義類:

Number（數(shù)字）
    int（整型）
    float（浮點(diǎn)型）
    complex（復(fù)數(shù)）
    bool（布爾）
String（字符串）
List（列表）
Tuple（元組）
Sets（集合）
Dictionary（字典）
自定義類

3.其他的你的例子中都有,自定義類的例子如下:

    class Person:
        def __init__(self,name):
            self.name = name
        def __repr__(self):
            return self.name
    lst0 = [1,Person("小明"), 5]
    lst1 = lst0.copy()
    print(lst0 ==lst1,lst1,lst0,id(lst1),id(lst0))
    lst0[1].name="小云"
    print(lst0 ==lst1,lst1,lst0,id(lst1),id(lst0))

結(jié)果:
True [1, 小明, 5] [1, 小明, 5] 66244280 66311632
True [1, 小云, 5] [1, 小云, 5] 66244280 66311632
和你的一樣,修改引用類型會(huì)修改所有指向這個(gè)地址的對(duì)象.

4.字符串和元祖不支持修改

關(guān)于mock.patch()和mock.patch.object()的區(qū)別的問題

舊時(shí)光回答

python mock patch 的使用

flask中復(fù)雜查詢對(duì)象如何封裝成json數(shù)據(jù)？

糖果果回答

我的做法是生成一個(gè)字典來保存數(shù)據(jù)庫的對(duì)象，最后return json.dumps(這個(gè)字典)

如何讓列表內(nèi)的元素前后相加生成新的元素？

風(fēng)清揚(yáng) 回答

測(cè)資:

data = [1, 2, 3, 4]

代碼:

from itertools import accumulate

data1 = [v + (data[i-1] if i > 0 else 0) for i, v in enumerate(data)]
data2 = list(accumulate(data))

print(data1)
print(data2)

結(jié)果:

[1, 3, 5, 7]
[1, 3, 6, 10]

我回答過的問題: Python-QA

js 多層嵌套的數(shù)組，取出最后一層的數(shù)據(jù)?

久舊酒回答

你是想要獲取到所有的 children 下面的 id？

function funEach(data){
    var arr = new Array();                //    準(zhǔn)備空數(shù)組存放
    /*    第一層是索引數(shù)組 */
    for(var i = 0;i < data.length;i++){
        /*   該判斷通過，說明下面具有值     */
       if(data[i]['children'][0] != undefined){
           var l = data[i]['children'][0];            //    準(zhǔn)備個(gè)臨時(shí)變量
           for(var iOne = 0;iOne < l.length;i++){
               arr[] = l['id'];
           }
       }
    }
    return arr;
}

嗯，按我邏輯來想的話應(yīng)該是沒問題的。

在 python3下出現(xiàn)'ascii' codec can't encode characters

毀了心回答

我遇到過一次中文路徑的問題,是通過在uwsgi.ini配置中修改Linux的語言環(huán)境解決的

[uwsgi]
...
env=DJANGO_SETTINGS_MODULE=mysite.settings # 解決中文編碼異常問題
env=LC_ALL=zh_CN.UTF-8

可以參考一下

iframe根據(jù)內(nèi)容變化調(diào)整高度

純妹回答

onload="this.height=rightFrame.document.body.scrollHeight;"不知道能不能解決你的問題

爬取馬蜂窩首頁的熱門游記遇到分頁請(qǐng)求參數(shù)的問題？

野橘回答

152..那個(gè)是時(shí)間戳，精確到毫秒的。
callback是jsonp跨域請(qǐng)求自動(dòng)生成的。你需要偽裝成一個(gè)jsonp的請(qǐng)求。
或者你不帶callback試試，因?yàn)榭缬蛳拗浦辉跒g覽器。試一下它后端是否支持普通的API請(qǐng)求。

python2.7 list轉(zhuǎn)成成元組處理方法

好難瘦回答

l = [{'storage1': '0000:05:00.1', 'storage0': '0000:05:00.0', 'data1': '0000:04:00.1', 'control1': '0000:02:00.1',
      'control0': '0000:02:00.0', 'data0': '0000:04:00.0'},
     {'storage1': '0000:03:00.1', 'storage0': '0000:03:00.0', 'data1': '0000:06:00.1', 'control1': '0000:81:00.1',
      'control0': '0000:81:00.0', 'data0': '0000:06:00.0'},
     {'storage1': '0000:03:00.1', 'storage0': '0000:03:00.0', 'data1': '0000:06:00.1', 'control1': '0000:81:00.1',
      'control0': '0000:81:00.0', 'data0': '0000:06:00.0'},
     {'storage1': '0000:04:00.1', 'storage0': '0000:04:00.0', 'data1': '0000:81:00.1', 'control1': '0000:01:00.1',
      'control0': '0000:01:00.0', 'data0': '0000:81:00.0'},
     {'storage1': '0000:08:00.1', 'storage0': '0000:08:00.0', 'data1': '0000:05:00.1', 'control1': '0000:02:00.1',
      'control0': '0000:02:00.0', 'data0': '0000:05:00.0'}]

d = {'nic_list': []}

all_poi = []

for item in l:
    for k, v in item.items():
        if 'storage' in k:
            all_poi.append(v)

for i, j in zip(all_poi, all_poi):
    a = (i, j)
    d['nic_list'].append(a)

print(d)

運(yùn)行結(jié)果 {'nic_list': [('0000:05:00.1', '0000:05:00.1'), ('0000:05:00.0', '0000:05:00.0'), ('0000:03:00.1', '0000:03:00.1'), ('0000:03:00.0', '0000:03:00.0'), ('0000:03:00.1', '0000:03:00.1'), ('0000:03:00.0', '0000:03:00.0'), ('0000:04:00.1', '0000:04:00.1'), ('0000:04:00.0', '0000:04:00.0'), ('0000:08:00.1', '0000:08:00.1'), ('0000:08:00.0', '0000:08:00.0')]}

python爬蟲json解析的問題

陪我終回答

Quite simple:

>>> print '"Hello,\\nworld!"'.decode('string_escape')
"Hello,
world!"

>>> data = json.loads('{\"count\":8,\"sub_images\":[{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/470700000c7084773fb2\",\"width\":1178,\"url_list\":[{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/470700000c7084773fb2\"},{\"url\":\"http:\\/\\/pb9.pstatp.com\\/origin\\/470700000c7084773fb2\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/470700000c7084773fb2\"}],\"uri\":\"origin\\/470700000c7084773fb2\",\"height\":1590},{\"url\":\"http:\\/\\/p9.pstatp.com\\/origin\\/47050001b69355a0bf1b\",\"width\":1178,\"url_list\":[{\"url\":\"http:\\/\\/p9.pstatp.com\\/origin\\/47050001b69355a0bf1b\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/47050001b69355a0bf1b\"},{\"url\":\"http:\\/\\/pb3.pstatp.com\\/origin\\/47050001b69355a0bf1b\"}],\"uri\":\"origin\\/47050001b69355a0bf1b\",\"height\":1557},{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/470300020761150d671a\",\"width\":1178,\"url_list\":[{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/470300020761150d671a\"},{\"url\":\"http:\\/\\/pb9.pstatp.com\\/origin\\/470300020761150d671a\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/470300020761150d671a\"}],\"uri\":\"origin\\/470300020761150d671a\",\"height\":1552},{\"url\":\"http:\\/\\/p1.pstatp.com\\/origin\\/47000002200f2a0a9020\",\"width\":1178,\"url_list\":[{\"url\":\"http:\\/\\/p1.pstatp.com\\/origin\\/47000002200f2a0a9020\"},{\"url\":\"http:\\/\\/pb3.pstatp.com\\/origin\\/47000002200f2a0a9020\"},{\"url\":\"http:\\/\\/pb9.pstatp.com\\/origin\\/47000002200f2a0a9020\"}],\"uri\":\"origin\\/47000002200f2a0a9020\",\"height\":1575},{\"url\":\"http:\\/\\/p1.pstatp.com\\/origin\\/470000022011d5569ccb\",\"width\":1178,\"url_list\":[{\"url\":\"http:\\/\\/p1.pstatp.com\\/origin\\/470000022011d5569ccb\"},{\"url\":\"http:\\/\\/pb3.pstatp.com\\/origin\\/470000022011d5569ccb\"},{\"url\":\"http:\\/\\/pb9.pstatp.com\\/origin\\/470000022011d5569ccb\"}],\"uri\":\"origin\\/470000022011d5569ccb\",\"height\":1588},{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/4700000220127db96444\",\"width\":1178,\"url_list\":[{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/4700000220127db96444\"},{\"url\":\"http:\\/\\/pb9.pstatp.com\\/origin\\/4700000220127db96444\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/4700000220127db96444\"}],\"uri\":\"origin\\/4700000220127db96444\",\"height\":1561},{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/46ff000532e33a9fa35a\",\"width\":1178,\"url_list\":[{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/46ff000532e33a9fa35a\"},{\"url\":\"http:\\/\\/pb9.pstatp.com\\/origin\\/46ff000532e33a9fa35a\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/46ff000532e33a9fa35a\"}],\"uri\":\"origin\\/46ff000532e33a9fa35a\",\"height\":1563},{\"url\":\"http:\\/\\/p9.pstatp.com\\/origin\\/470700000c7b871a5fae\",\"width\":1178,\"url_list\":[{\"url\":\"http:\\/\\/p9.pstatp.com\\/origin\\/470700000c7b871a5fae\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/470700000c7b871a5fae\"},{\"url\":\"http:\\/\\/pb3.pstatp.com\\/origin\\/470700000c7b871a5fae\"}],\"uri\":\"origin\\/470700000c7b871a5fae\",\"height\":1575}],\"max_img_width\":1178,\"labels\":[],\"sub_abstracts\":[\" \",\" \",\" \",\" \",\" \",\" \",\" \",\" \"],\"sub_titles\":[\"\\u6e05\\u65b0\\u81ea\\u7136\\uff0c\\u7f8e\\u4e3d\\u65e0\\u53cc\",\"\\u6e05\\u65b0\\u81ea\\u7136\\uff0c\\u7f8e\\u4e3d\\u65e0\\u53cc\",\"\\u6e05\\u65b0\\u81ea\\u7136\\uff0c\\u7f8e\\u4e3d\\u65e0\\u53cc\",\"\\u6e05\\u65b0\\u81ea\\u7136\\uff0c\\u7f8e\\u4e3d\\u65e0\\u53cc\",\"\\u6e05\\u65b0\\u81ea\\u7136\\uff0c\\u7f8e\\u4e3d\\u65e0\\u53cc\",\"\\u6e05\\u65b0\\u81ea\\u7136\\uff0c\\u7f8e\\u4e3d\\u65e0\\u53cc\",\"\\u6e05\\u65b0\\u81ea\\u7136\\uff0c\\u7f8e\\u4e3d\\u65e0\\u53cc\",\"\\u6e05\\u65b0\\u81ea\\u7136\\uff0c\\u7f8e\\u4e3d\\u65e0\\u53cc\"]}'.decode('string_escape'))
>>> 
>>> data["count"]
8
>>>

flask無法正常啟動(dòng)，報(bào)錯(cuò) 'InteractiveInterpreter'相關(guān)的錯(cuò).

雨萌萌回答

最后去該conda虛擬環(huán)境的python下找到了code.py和code.pyc,不過code.py改名和刪掉code.pyc并沒有用，因?yàn)閣erkzeug下的debug/console.py是直接引用python環(huán)境下的code,我直接把code.py拷貝一份到debug文件夾下就ok了

求分析為什么源代碼有id，可是用selenium找不到目標(biāo)

半心人回答

頂上去求解答！

有什么web測(cè)試工具，可以看到訪問速度，內(nèi)存消耗什么的？

哎呦喂回答

api測(cè)試的話，可以用postman

linux 創(chuàng)建uwsgi軟連接成功，但是在命令行鍵入uwsgi時(shí)報(bào)錯(cuò)，請(qǐng)問該如何處理

孤巷回答

解決辦法：64位系統(tǒng)

[root@ richie]# ln -s /usr/local/lib/libpcre.so.1 /lib64
[root@ richie]# /usr/bin/uwsgi --ini /usr/local/nginx/conf/uwsgi.ini
[uWSGI] getting INI configuration from /usr/local/nginx/conf/uwsgi.ini