源代碼 Parser/Tokenizer.c 進行詞法分析。首先是依照編碼進行解碼。然后通過tok_get 函數(shù)進行詞法分析。
首先處理縮進;
然后是消除空白字符、注釋以及空白行;
再然后是標識符、數(shù)字、字符串;
再然后是續(xù)行符('\'+直接輸入換行);
最后是操作符。
static int
tok_get(struct tok_state *tok, char **p_start, char **p_end)
{
int c;
int blankline, nonascii;
*p_start = *p_end = NULL;
nextline:
tok->start = NULL;
blankline = 0;
/* Get indentation level */
if (tok->atbol) {
int col = 0;
int altcol = 0;
tok->atbol = 0;
for (;;) {
c = tok_nextc(tok);
if (c == ' ') {
col++, altcol++;
}
else if (c == '\t') {
col = (col/tok->tabsize + 1) * tok->tabsize;
altcol = (altcol/tok->alttabsize + 1)
* tok->alttabsize;
}
else if (c == '\014') {/* Control-L (formfeed) */
col = altcol = 0; /* For Emacs users */
}
else {
break;
}
}
tok_backup(tok, c);
if (c == '#' || c == '\n') {
/* Lines with only whitespace and/or comments
shouldn't affect the indentation and are
not passed to the parser as NEWLINE tokens,
except *totally* empty lines in interactive
mode, which signal the end of a command group. */
if (col == 0 && c == '\n' && tok->prompt != NULL) {
blankline = 0; /* Let it through */
}
else {
blankline = 1; /* Ignore completely */
}
/* We can't jump back right here since we still
may need to skip to the end of a comment */
}
if (!blankline && tok->level == 0) {
if (col == tok->indstack[tok->indent]) {
/* No change */
if (altcol != tok->altindstack[tok->indent]) {
if (indenterror(tok)) {
return ERRORTOKEN;
}
}
}
else if (col > tok->indstack[tok->indent]) {
/* Indent -- always one */
if (tok->indent+1 >= MAXINDENT) {
tok->done = E_TOODEEP;
tok->cur = tok->inp;
return ERRORTOKEN;
}
if (altcol <= tok->altindstack[tok->indent]) {
if (indenterror(tok)) {
return ERRORTOKEN;
}
}
tok->pendin++;
tok->indstack[++tok->indent] = col;
tok->altindstack[tok->indent] = altcol;
}
else /* col < tok->indstack[tok->indent] */ {
/* Dedent -- any number, must be consistent */
while (tok->indent > 0 &&
col < tok->indstack[tok->indent]) {
tok->pendin--;
tok->indent--;
}
if (col != tok->indstack[tok->indent]) {
tok->done = E_DEDENT;
tok->cur = tok->inp;
return ERRORTOKEN;
}
if (altcol != tok->altindstack[tok->indent]) {
if (indenterror(tok)) {
return ERRORTOKEN;
}
}
}
}
}
tok->start = tok->cur;
/* Return pending indents/dedents */
if (tok->pendin != 0) {
if (tok->pendin < 0) {
tok->pendin++;
return DEDENT;
}
else {
tok->pendin--;
return INDENT;
}
}
if (tok->async_def
&& !blankline
&& tok->level == 0
/* There was a NEWLINE after ASYNC DEF,
so we're past the signature. */
&& tok->async_def_nl
/* Current indentation level is less than where
the async function was defined */
&& tok->async_def_indent >= tok->indent)
{
tok->async_def = 0;
tok->async_def_indent = 0;
tok->async_def_nl = 0;
}
again:
tok->start = NULL;
/* Skip spaces */
do {
c = tok_nextc(tok);
} while (c == ' ' || c == '\t' || c == '\014');
/* Set start of current token */
tok->start = tok->cur - 1;
/* Skip comment */
if (c == '#') {
while (c != EOF && c != '\n') {
c = tok_nextc(tok);
}
}
/* Check for EOF and errors now */
if (c == EOF) {
return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
}
/* Identifier (most frequent token!) */
nonascii = 0;
if (is_potential_identifier_start(c)) {
/* Process the various legal combinations of b"", r"", u"", and f"". */
int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0;
while (1) {
if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
saw_b = 1;
/* Since this is a backwards compatibility support literal we don't
want to support it in arbitrary order like byte literals. */
else if (!(saw_b || saw_u || saw_r || saw_f)
&& (c == 'u'|| c == 'U')) {
saw_u = 1;
}
/* ur"" and ru"" are not supported */
else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
saw_r = 1;
}
else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
saw_f = 1;
}
else {
break;
}
c = tok_nextc(tok);
if (c == '"' || c == '\'') {
goto letter_quote;
}
}
while (is_potential_identifier_char(c)) {
if (c >= 128) {
nonascii = 1;
}
c = tok_nextc(tok);
}
tok_backup(tok, c);
if (nonascii && !verify_identifier(tok)) {
return ERRORTOKEN;
}
*p_start = tok->start;
*p_end = tok->cur;
/* async/await parsing block. */
if (tok->cur - tok->start == 5) {
/* Current token length is 5. */
if (tok->async_def) {
/* We're inside an 'async def' function. */
if (memcmp(tok->start, "async", 5) == 0) {
return ASYNC;
}
if (memcmp(tok->start, "await", 5) == 0) {
return AWAIT;
}
}
else if (memcmp(tok->start, "async", 5) == 0) {
/* The current token is 'async'.
Look ahead one token.*/
struct tok_state ahead_tok;
char *ahead_tok_start = NULL, *ahead_tok_end = NULL;
int ahead_tok_kind;
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
&ahead_tok_end);
if (ahead_tok_kind == NAME
&& ahead_tok.cur - ahead_tok.start == 3
&& memcmp(ahead_tok.start, "def", 3) == 0)
{
/* The next token is going to be 'def', so instead of
returning 'async' NAME token, we return ASYNC. */
tok->async_def_indent = tok->indent;
tok->async_def = 1;
return ASYNC;
}
}
}
return NAME;
}
/* Newline */
if (c == '\n') {
tok->atbol = 1;
if (blankline || tok->level > 0) {
goto nextline;
}
*p_start = tok->start;
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
tok->cont_line = 0;
if (tok->async_def) {
/* We're somewhere inside an 'async def' function, and
we've encountered a NEWLINE after its signature. */
tok->async_def_nl = 1;
}
return NEWLINE;
}
/* Period or number starting with period? */
if (c == '.') {
c = tok_nextc(tok);
if (isdigit(c)) {
goto fraction;
} else if (c == '.') {
c = tok_nextc(tok);
if (c == '.') {
*p_start = tok->start;
*p_end = tok->cur;
return ELLIPSIS;
}
else {
tok_backup(tok, c);
}
tok_backup(tok, '.');
}
else {
tok_backup(tok, c);
}
*p_start = tok->start;
*p_end = tok->cur;
return DOT;
}
/* Number */
if (isdigit(c)) {
if (c == '0') {
/* Hex, octal or binary -- maybe. */
c = tok_nextc(tok);
if (c == 'x' || c == 'X') {
/* Hex */
c = tok_nextc(tok);
do {
if (c == '_') {
c = tok_nextc(tok);
}
if (!isxdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (isxdigit(c));
} while (c == '_');
}
else if (c == 'o' || c == 'O') {
/* Octal */
c = tok_nextc(tok);
do {
if (c == '_') {
c = tok_nextc(tok);
}
if (c < '0' || c >= '8') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while ('0' <= c && c < '8');
} while (c == '_');
}
else if (c == 'b' || c == 'B') {
/* Binary */
c = tok_nextc(tok);
do {
if (c == '_') {
c = tok_nextc(tok);
}
if (c != '0' && c != '1') {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (c == '0' || c == '1');
} while (c == '_');
}
else {
int nonzero = 0;
/* maybe old-style octal; c is first char of it */
/* in any case, allow '0' as a literal */
while (1) {
if (c == '_') {
c = tok_nextc(tok);
if (!isdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
}
if (c != '0') {
break;
}
c = tok_nextc(tok);
}
if (isdigit(c)) {
nonzero = 1;
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
}
if (c == '.') {
c = tok_nextc(tok);
goto fraction;
}
else if (c == 'e' || c == 'E') {
goto exponent;
}
else if (c == 'j' || c == 'J') {
goto imaginary;
}
else if (nonzero) {
/* Old-style octal: now disallowed. */
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
}
}
else {
/* Decimal */
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
{
/* Accept floating point numbers. */
if (c == '.') {
c = tok_nextc(tok);
fraction:
/* Fraction */
if (isdigit(c)) {
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
}
}
if (c == 'e' || c == 'E') {
int e;
exponent:
e = c;
/* Exponent part */
c = tok_nextc(tok);
if (c == '+' || c == '-') {
c = tok_nextc(tok);
if (!isdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
} else if (!isdigit(c)) {
tok_backup(tok, c);
tok_backup(tok, e);
*p_start = tok->start;
*p_end = tok->cur;
return NUMBER;
}
c = tok_decimal_tail(tok);
if (c == 0) {
return ERRORTOKEN;
}
}
if (c == 'j' || c == 'J') {
/* Imaginary part */
imaginary:
c = tok_nextc(tok);
}
}
}
tok_backup(tok, c);
*p_start = tok->start;
*p_end = tok->cur;
return NUMBER;
}
letter_quote:
/* String */
if (c == '\'' || c == '"') {
int quote = c;
int quote_size = 1; /* 1 or 3 */
int end_quote_size = 0;
/* Find the quote size and start of string */
c = tok_nextc(tok);
if (c == quote) {
c = tok_nextc(tok);
if (c == quote) {
quote_size = 3;
}
else {
end_quote_size = 1; /* empty string found */
}
}
if (c != quote) {
tok_backup(tok, c);
}
/* Get rest of string */
while (end_quote_size != quote_size) {
c = tok_nextc(tok);
if (c == EOF) {
if (quote_size == 3) {
tok->done = E_EOFS;
}
else {
tok->done = E_EOLS;
}
tok->cur = tok->inp;
return ERRORTOKEN;
}
if (quote_size == 1 && c == '\n') {
tok->done = E_EOLS;
tok->cur = tok->inp;
return ERRORTOKEN;
}
if (c == quote) {
end_quote_size += 1;
}
else {
end_quote_size = 0;
if (c == '\\') {
tok_nextc(tok); /* skip escaped char */
}
}
}
*p_start = tok->start;
*p_end = tok->cur;
return STRING;
}
/* Line continuation */
if (c == '\\') {
c = tok_nextc(tok);
if (c != '\n') {
tok->done = E_LINECONT;
tok->cur = tok->inp;
return ERRORTOKEN;
}
tok->cont_line = 1;
goto again; /* Read next line */
}
/* Check for two-character token */
{
int c2 = tok_nextc(tok);
int token = PyToken_TwoChars(c, c2);
if (token != OP) {
int c3 = tok_nextc(tok);
int token3 = PyToken_ThreeChars(c, c2, c3);
if (token3 != OP) {
token = token3;
}
else {
tok_backup(tok, c3);
}
*p_start = tok->start;
*p_end = tok->cur;
return token;
}
tok_backup(tok, c2);
}
/* Keep track of parentheses nesting level */
switch (c) {
case '(':
case '[':
case '{':
tok->level++;
break;
case ')':
case ']':
case '}':
tok->level--;
break;
}
/* Punctuation character */
*p_start = tok->start;
*p_end = tok->cur;
return PyToken_OneChar(c);
}
rules是Rule的列表,所以你可以放多個規(guī)則不同的Rule
mysql.connector是官方出的,和MySQLdb不一樣。
python3可以使用pymysql兼容MySQLdb
import pymysql
pymysql.install_as_MySQLdb()這條sql有問題吧
sql3 = "update account set (account_fullname, user,account_id_from_media, funds,agency_id,media_id)values(%s,%s,%s,%s,%d,%d) where media_id=4 and account_id_from_media=manage_page_data[i - 9 + 1] ;" % (
manage_page_data[i - 9 + 1] 在引號里,不會被解析吧?
django 的 URL 規(guī)則中, 如果是<int:level>/<int:category>, 那么level和category就必須是正的整形數(shù)字. 不能是負數(shù).
參考URL調(diào)度器:
int - Matches zero or any positive integer. Returns an int.
你這里的level和category的默認值是-1, 不是正數(shù), 因此報錯, 可以嘗試改成默認值為0.
稍優(yōu)化了一點,按你的算法,有n個元素的數(shù)組,要循環(huán)
n * n * in_array里的次數(shù),in_array內(nèi)部也是循環(huán)
var arr = [1, 2, 5, 6, 7];//如果這個數(shù)組不是有序數(shù)組,哪還要先加排序
var len =arr.length
let result=[]
let count=0
for(let a=0;a<len;a++){
let max = arr.pop()
let newlen = arr.length
for(let i=0;i<newlen-1;i++){
if(arr[i]+arr[i+1]> max){
break;
}
for(let j=i;j<newlen-1;j++){
let plus = arr[i]+arr[j+1]
count++
if(plus>max){
break;
}
if(plus==max){
result.push([max,arr[i],arr[j+1]])
}
}
}
}
console.log(result)//輸出結(jié)果
console.log(count)//輸出總循環(huán)次數(shù),
回復里說的好,我沒有考慮負數(shù)的情況,如果要考慮負數(shù),哪把最大數(shù)pop出來,就不行了,只能重新維護一條新數(shù)組,用來枚舉所有值,修改如下
var arr = [-8, -1, 1, 2, 5, 6, 7];//如果這個數(shù)組不是有序數(shù)組,哪還要先加排序
var len =arr.length
var arr1 = [...arr] //復制一條新數(shù)組
let result=[]
let count=0
for(let a=0;a<len;a++){
let max = arr1.pop()// 從新數(shù)組中枚舉各個值。
let newlen = arr.length
for(let i=0;i<newlen-1;i++){
if(arr[i]+arr[i+1]> max){
break;
}
for(let j=i;j<newlen-1;j++){
let plus = arr[i]+arr[j+1]
count++
if(plus>max){
break;
}
if(plus==max){
result.push([max,arr[i],arr[j+1]])
}
}
}
}
console.log(result)//輸出結(jié)果
console.log(count)//輸出總循環(huán)次數(shù),
輸出
[[7, 1, 6], [7, 2, 5], [6, -1, 7], [6, 1, 5], [5, -1, 6], [1, -1, 2], [-1, -8, 7]]如果你dataframe2的index和dataframe1是一致的
dataframe1.drop(dataframe2.index)
別怪我啰嗦,深深的刺痛你。
data = b'你所拿到的全部數(shù)據(jù)'
也就是說,把你的全部數(shù)據(jù)都納入 b'' 中。如果涉及到換行問題,請自行用 Python 來處理即可。
linux打印pdf subprocess.Popen(['lpr', fname])
excel的話,我沒在linux下打印過,不能確定。
localeCompare 可以指定你自己的排序方式,就像1L說的那樣,默認的排序方式是根據(jù)ASCII碼去排序,不一定是你想要的需求,可以自定義,按你自己的需求。
對,就是你的url配置出了問題,嘗試一下這個
from django.urls import re_path
re_path('CetPersInfo/(?P<topic_id>d+)/',views.personal_info,name='personal_info'),其實Django2.0以上的版本都在用這種格式了。
In the current implementation apply calls func twice on the first column/row to decide whether it can take a fast or slow code path. This can lead to unexpected behavior if func has side-effects, as they will take effect twice for the first column/row.
#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
import re
import requests
from bs4 import BeautifulSoup
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36'}
def urls():
url = 'https://list.jd.com/list.html?cat=670,671,1105&ev=exbrand_8551&page=1&delivery=1&delivery_daofu=0&sort=sort_totalsales15_desc&trans=1&JL=4_11_0#J_main'
html = requests.get(url,headers=headers)
find_urls = re.findall('a target="_blank" href="(.*?)" ',html.text)[1:]
for find_url in find_urls:
find_text('http:'+find_url)
def find_text(find_url):
html =requests.get(find_url,headers=headers).text
soup = BeautifulSoup(html,'html5lib')
find_texts = soup.find('div',class_='Ptable')
print(find_texts.get_text('\n',strip=True))
print(100*'*')
if __name__ == '__main__':
urls()
文檔上這么舉例的:
$ python -m timeit -s 'text = "sample string"; char = "g"' 'char in text'
10000000 loops, best of 3: 0.0877 usec per loop
$ python -m timeit -s 'text = "sample string"; char = "g"' 'text.find(char)'
1000000 loops, best of 3: 0.342 usec per loop
也就是說,通過命令行方式執(zhí)行,timeit是自動判斷該循環(huán)多少次,我想應該是單次耗時越短,執(zhí)行次數(shù)就多一些。
區(qū)分端口就行了
用你的方法,直接運行,自動登錄沒問題
我推薦使用csv模塊寫入csv文件:
在你這個例子中, 代碼就是這樣:
import csv
from sklearn.datasets import make_blobs
from matplotlib import pyplot
data,target=make_blobs(n_samples=100,n_features=2,centers=3,cluster_std=[1.0,3.0,2.0])
with open('test.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
for point in data:
writer.writerow(point)
pyplot.scatter(data[:,0],data[:,1],c=target);
pyplot.show()
會得到這樣的test.csv:
9.079981991444182,-6.625044790626964
7.573744080944893,-3.8307421246142286
0.18689336599603878,-4.4228616757162555
8.17862194498336,-5.8660242485855765
7.627525274619017,-6.08593822120887
-3.3954682566821948,-7.071216921667429
9.393298715364962,-4.7199985116769如果你有過微信支付開發(fā)經(jīng)歷,這應該是一個很簡單的需求。微信統(tǒng)一下單->用戶支付->驗證支付狀態(tài)->用支付信息交換注冊碼->展示。
細節(jié)就不展開了,只要跑通微信支付基本沒有什么技術(shù)難度
python源碼下載完make完之后就有了
1.編寫爬蟲規(guī)則時,請避免使用parse作為回調(diào)函數(shù)。 由于CrawlSpider使用parse方法來實現(xiàn)其邏輯,如果您覆蓋了parse方法,crawl spider 將會運行失敗
2.第一調(diào)用parse_item,用xpath提取網(wǎng)頁內(nèi)容,然后用Rule提取網(wǎng)頁規(guī)則,在這里提取到2.shtml
3.setting設置:當你使用Scrapy,你必須告訴它你使用哪些設置。您可以通過使用環(huán)境變量來執(zhí)行此操作SCRAPY_SETTINGS_MODULE值SCRAPY_SETTINGS_MODULE應該在Python路徑語法中,例如 myproject.settings。請注意,設置模塊應該在Python 導入搜索路徑上。
北大青鳥APTECH成立于1999年。依托北京大學優(yōu)質(zhì)雄厚的教育資源和背景,秉承“教育改變生活”的發(fā)展理念,致力于培養(yǎng)中國IT技能型緊缺人才,是大數(shù)據(jù)專業(yè)的國家
達內(nèi)教育集團成立于2002年,是一家由留學海歸創(chuàng)辦的高端職業(yè)教育培訓機構(gòu),是中國一站式人才培養(yǎng)平臺、一站式人才輸送平臺。2014年4月3日在美國成功上市,融資1
北大課工場是北京大學校辦產(chǎn)業(yè)為響應國家深化產(chǎn)教融合/校企合作的政策,積極推進“中國制造2025”,實現(xiàn)中華民族偉大復興的升級產(chǎn)業(yè)鏈。利用北京大學優(yōu)質(zhì)教育資源及背
博為峰,中國職業(yè)人才培訓領域的先行者
曾工作于聯(lián)想擔任系統(tǒng)開發(fā)工程師,曾在博彥科技股份有限公司擔任項目經(jīng)理從事移動互聯(lián)網(wǎng)管理及研發(fā)工作,曾創(chuàng)辦藍懿科技有限責任公司從事總經(jīng)理職務負責iOS教學及管理工作。
浪潮集團項目經(jīng)理。精通Java與.NET 技術(shù), 熟練的跨平臺面向?qū)ο箝_發(fā)經(jīng)驗,技術(shù)功底深厚。 授課風格 授課風格清新自然、條理清晰、主次分明、重點難點突出、引人入勝。
精通HTML5和CSS3;Javascript及主流js庫,具有快速界面開發(fā)的能力,對瀏覽器兼容性、前端性能優(yōu)化等有深入理解。精通網(wǎng)頁制作和網(wǎng)頁游戲開發(fā)。
具有10 年的Java 企業(yè)應用開發(fā)經(jīng)驗。曾經(jīng)歷任德國Software AG 技術(shù)顧問,美國Dachieve 系統(tǒng)架構(gòu)師,美國AngelEngineers Inc. 系統(tǒng)架構(gòu)師。