在线观看不卡亚洲电影_亚洲妓女99综合网_91青青青亚洲娱乐在线观看_日韩无码高清综合久久

鍍金池/ 問答/Python/ Linux服務器 下 python 使用 selenium + PhantomJ

Linux服務器 下 python 使用 selenium + PhantomJS 時出錯?

# -*- coding: utf-8 -*-
import time
import json
import tempfile
from PIL import Image
from selenium import webdriver

# 要想調(diào)用鍵盤按鍵操作需要引入keys包
from selenium.webdriver.common.keys import Keys


class CreditSpider(object):

    def __init__(self):
        # 調(diào)用環(huán)境變量指定的PhantomJS瀏覽器創(chuàng)建瀏覽器對象
        self.driver = webdriver.PhantomJS()
        # self.driver.maximize_window()  # 設置全屏

        # 如果沒有在環(huán)境變量指定PhantomJS位置
        # self.driver = webdriver.PhantomJS(executable_path='/Users/bianyachao/Downloads/phantomjs-2.1.1-macosx/bin/phantomjs')
        self.driver.set_window_size(1366, 1098)

    def get_credit(self):
        url = "http://hd.chinatax.gov.cn/fagui/action/InitCredit.do"
        # get方法會一直等到頁面加載,然后才會繼續(xù)程序,通常測試會在這里選擇time.sleep(2)
        self.driver.get(url)
        time.sleep(2)
        # 生成頁面快照并保存
        # self.driver.save_screenshot("nsrxy.png")
        self.on_click(1)

    def on_click(self, n):
        while n < 3:
            if n == 1:
                self.driver.find_element_by_xpath("""http://a[@onclick="changeParam('articleField01','')"]""").click()
            else:
                self.driver.find_element_by_xpath('//*[@title="下一頁"]').click()
            time.sleep(2)
            self.driver.save_screenshot("yzm.png")
            self.jietu_img(n)
            self.click_yzm(n)
            n += 1
        return

    def click_yzm(self, n):
        print('the path is {}'.format(n))
        element = self.driver.find_element_by_id("verifyCode")
        self.driver.find_element_by_id("verifyCode").clear()
        # im = Image.open('codeImage.png')
        # im.show()
        yzm = input("please input code: ")
        element.send_keys(yzm)
        self.driver.find_element_by_xpath("""http://*[@id="layui-layer1"]/div[3]/a[1]""").click()
        time.sleep(4)
        # self.driver.save_screenshot("result{}.png".format(n))
        self.parse_page(n)
        return

    def parse_page(self, n):
        try:
            res_list = []
            data = self.driver.find_elements_by_xpath('//td[@class="sv_hei"]//tr/td')
            if not data:
                self.on_click(n)
            for a in data:
                d = a.text
                res_list.append(d)
            self.save_res(res_list)
        except Exception as e:
            print(e)
        return

    def save_res(self, res_list):
        cont_list = []
        del res_list[-1]
        for i in range(0, 3):
            del res_list[0]
        for i in range(0, len(res_list), 3):
            res_dic = {
                'NSSBH': res_list[i],
                'NSRMC': res_list[i+1],
                'YEAR': res_list[i+2],
            }
            cont_list.append(res_dic)
        print(cont_list)
        # json_str = json.dumps(cont_list)
        # with open('res.text', 'a+') as f:
        #     f.write(json_str + '\n\n')
        return

    def jietu_img(self, n):
        # 參數(shù)說明
        # 第一個參數(shù) 開始截圖的x坐標
        # 第二個參數(shù) 開始截圖的y坐標
        # 第三個參數(shù) 結(jié)束截圖的x坐標
        # 第四個參數(shù) 結(jié)束截圖的y坐標

        png = Image.open('yzm.png')
        if n == 1:
            bbox = (531, 510, 731, 560)
        else:
            bbox = (531, 512, 731, 562)
        region = png.crop(bbox)  # 此時,region是一個新的圖像對象
        time.sleep(1)
        region.save('codeImage.png')
        return


if __name__ == '__main__':
    s = time.time()
    a = CreditSpider()
    a.get_credit()
    # a.jietu_img_2()
    print(time.time()-s)

報錯:

Traceback (most recent call last):
  File "credit_spider.py", line 122, in <module>
    a.get_credit()
  File "credit_spider.py", line 31, in get_credit
    self.on_click(1)
  File "credit_spider.py", line 36, in on_click
    self.driver.find_element_by_xpath("""http://a[@onclick="changeParam('articleField01','')"]""").click()
  File "/root/anaconda3/lib/python3.6/site-packages/selenium/webdriver/remote/webelement.py", line 80, in click
    self._execute(Command.CLICK_ELEMENT)
  File "/root/anaconda3/lib/python3.6/site-packages/selenium/webdriver/remote/webelement.py", line 501, in _execute
    return self._parent.execute(command, params)
  File "/root/anaconda3/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 308, in execute
    self.error_handler.check_response(response)
  File "/root/anaconda3/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementNotVisibleException: Message: {"errorMessage":"Element is not currently visible and may not be manipulated","request":{"headers":{"Accept":"application/json","Accept-Encoding":"identity","Connection":"close","Content-Length":"81","Content-Type":"application/json;charset=UTF-8","Host":"127.0.0.1:48854","User-Agent":"Python http auth"},"httpVersion":"1.1","method":"POST","post":"{\"id\": \":wdc:1531296937655\", \"sessionId\": \"9443d970-84e2-11e8-9955-092f7358cfea\"}","url":"/click","urlParsed":{"anchor":"","query":"","file":"click","directory":"/","path":"/click","relative":"/click","port":"","host":"","password":"","user":"","userInfo":"","authority":"","protocol":"","source":"/click","queryKey":{},"chunks":["click"]},"urlOriginal":"/session/9443d970-84e2-11e8-9955-092f7358cfea/element/:wdc:1531296937655/click"}}
Screenshot: available via screen

這是什么原因?求大神指導,謝謝了?。?/p>

回答
編輯回答
抱緊我

linux服務器,你的PhantomJS是mac版本?

self.driver = webdriver.PhantomJS(executable_path='/Users/bianyachao/Downloads/phantomjs-2.1.1-macosx/bin/phantomjs')
2017年5月28日 00:01
編輯回答
背叛者

運行了下你的代碼使用 Chrome 運行也報錯, 找不到標簽
你貼出來的錯誤也是找不到標簽, 建議使用 chrome 運行一次,如果 chrome 運行沒問題
改回PhantomJS 運行還報錯,就把 page_source 保存下來看看獲取的頁面結(jié)果

# 獲取頁面html 保存
result = driver.page_source
with open('tmp.html', 'w', encoding='utf-8') as f:
    f.write(result)

剛好前兩天整理的, Linux下跑 PhantomJS... PhantomJS+Selenium爬取數(shù)據(jù)

2018年7月15日 00:58