From cf7b87029f10e5b7c1e408f2107d4a89da0d2afd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=98=E5=9D=9A=E6=9E=9C?= <753610399@qq.com> Date: Thu, 29 Nov 2018 00:53:31 +0800 Subject: [PATCH] =?UTF-8?q?=E7=A7=BB=E9=99=A4=E6=B2=A1=E6=9C=89=E4=B8=8B?= =?UTF-8?q?=E7=BA=A7=E9=93=BE=E6=8E=A5=E7=9A=84=E2=80=9C=E5=B8=82=E8=BE=96?= =?UTF-8?q?=E5=8C=BA=E2=80=9D=EF=BC=8C=E4=BF=AE=E5=A4=8D4=E4=B8=AA?= =?UTF-8?q?=E7=9F=AD=E5=90=8D=E4=BC=9A=E5=90=8C=E5=90=8D=E5=B8=82=E5=8E=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...16\345\270\202\344\277\241\346\201\257.js" | 259 +++++++++++++ ...23\345\217\226\346\213\274\351\237\263.js" | 343 ++++++++++++++++++ 2 files changed, 602 insertions(+) create mode 100644 "2018/1_\346\212\223\345\217\226\345\233\275\345\256\266\347\273\237\350\256\241\345\261\200\345\237\216\345\270\202\344\277\241\346\201\257.js" create mode 100644 "2018/2_\346\212\223\345\217\226\346\213\274\351\237\263.js" diff --git "a/2018/1_\346\212\223\345\217\226\345\233\275\345\256\266\347\273\237\350\256\241\345\261\200\345\237\216\345\270\202\344\277\241\346\201\257.js" "b/2018/1_\346\212\223\345\217\226\345\233\275\345\256\266\347\273\237\350\256\241\345\261\200\345\237\216\345\270\202\344\277\241\346\201\257.js" new file mode 100644 index 0000000..fc94e65 --- /dev/null +++ "b/2018/1_\346\212\223\345\217\226\345\233\275\345\256\266\347\273\237\350\256\241\345\261\200\345\237\216\345\270\202\344\277\241\346\201\257.js" @@ -0,0 +1,259 @@ +/* +获取城市名称http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/index.html +*/ +(function(){ +if(!window.URL){ + throw new Error("浏览器版本太低"); +}; +function ajax(url,True,False){ + var ajax=new XMLHttpRequest(); + ajax.timeout=1000; + ajax.open("GET",url); + ajax.onreadystatechange=function(){ + if(ajax.readyState==4){ + if(ajax.status==200){ + True(ajax.responseText); + }else{ + False(); + } + } + } + ajax.send(); +} +function msg(){ + console.log.apply(console, arguments); +} + +function cityClass(name,url,code){ + this.name=name; + this.url=url; + this.code=code; + this.child=[]; + this.tryCount=0; +} +cityClass.prototype={ + getValue:function(){ + var obj={name:this.name,code:this.code,child:[]}; + for(var i=0;i(.+?)
"))+1){ + reg.lastIndex=idx; + while(match=reg.exec(text)){ + var url=match[1]; + if(url.indexOf("//")==-1 && url.indexOf("/")!=0){ + url=path+"/"+url; + } + var name=match[2]; + DATA.push(new cityClass(name,url,0)); + } + True(); + }else{ + msg("未发现省份数据"); + } + },function(){ + msg("读取省份列表出错","程序终止"); + }); +} +function load_shen(True, False){ + var city=DATA[JD.shen]; + city.tryCount++; + if(city.tryCount>3){ + msg("读取省份["+city.name+"]超过3次"); + False(); + return; + }; + + function get(){ + msg("读取省份["+city.name+"]", getJD()); + save(); + + city.child[JD.si].tryCount=0; + load_si(function(){ + JD.shen++; + if(JD.shen>=DATA.length){ + JD.shen=0; + True(); + return; + }; + DATA[JD.shen].tryCount=0; + + load_shen(True,False); + },function(){ + False(); + }); + } + + if(city.child.length){ + get(); + }else{ + ajax(city.url,function(text){ + var reg=/.+?href='(.+?)'>(.+?)<.+?'>(.+?)3){ + msg("读取城市["+city.name+"]超过3次"); + False(); + return; + }; + + + function get(){ + msg("___读取城市["+city.name+"]", getJD()); + + city.child[JD.xian].tryCount=0; + JD.si++; + if(JD.si>=shen.child.length){ + JD.si=0; + True(); + return; + }; + shen.child[JD.si].tryCount=0; + + load_si(True,False); + } + + if(city.child.length){ + get(); + }else{ + ajax(city.url,function(text){ + var reg=/class='(?:countytr|towntr)'.+?<\/tr>/ig; + var match; + while(match=reg.exec(text)){ + var reg2=/class='(?:countytr|towntr)'.+?(?:(.+?)<.+?'>(.+?)<|(.+?)<.+?(.+?)<)/ig; + var match2; + if(match2=reg2.exec(match[0])){ + var url=match2[1]||""; + if(url && url.indexOf("//")==-1 && url.indexOf("/")!=0){ + url=city.url.substring(0,city.url.lastIndexOf("/"))+"/"+url; + } + var code=match2[2]||match2[4]; + var name=match2[3]||match2[5]; + if(!url&&name=="市辖区"){ + //NOOP + }else{ + city.child.push(new cityClass(name,url,code)); + }; + }else{ + msg("未知城市模式:"); + msg(city.url); + msg(match[0]); + throw new Error("end"); + } + } + + JD.xian=0; + get(); + },function(){ + load_si(True,False); + }); + }; +} + + +function getJD(){ + var str="省:"+(JD.shen+1)+"/"+DATA.length; + var shen=DATA[JD.shen]; + if(shen){ + str+=" 市:"+(JD.si+1)+"/"+shen.child.length; + var si=shen.child[JD.si]; + if(si){ + str+=" 县:"+(JD.xian+1)+"/"+si.child.length; + }else{ + str+=" 县:"+JD.xian; + } + }else{ + str+=" 市:"+JD.si+" 县:"+JD.xian; + } + return str; +} +function save(){ + +} + +var DATA=[]; +var JD; +window.RunLoad=function(shen,si,xian){ + RunLoad.T1=Date.now(); + JD={ + shen:shen||0 + ,si:si||0 + ,xian:xian||0 + } + + function get(){ + DATA[JD.shen].tryCount=0; + load_shen(function(){ + console.log("完成:"+(Date.now()-RunLoad.T1)/1000+"秒"); + save(); + + var data=[]; + for(var i=0;i2){ + name=name.replace(/甯$/ig,""); + }; + }else{ + if(name.length>2 + && !/^(?:甯傝緰鍖簗(?:涓村|鍜岀敯|浼婂畞)[甯傚幙])$/.test(name) + && !/(鑷不.|鍦板尯|鐭垮尯|寮鍙戝尯)$/.test(name)){//鐩存帴鎺掗櫎浼氭湁鍚屽悕鐨 + name=name.replace(/(甯倈鍖簗鍘縷闀噟绠″浼殀琛楅亾鍔炰簨澶)$/ig,""); + }; + }; + var o2={ + name:name + ,ext_name:o.name + ,id:+o.code||0 + ,ext_id:+o.orgCode + ,pid:p&&+p.code||0 + ,deep:o.deep + }; + o.o2=o2; + return o2; + }; + var datas=[]; + if(CITY_LIST2){ + datas=CITY_LIST2; + }else{ + for(var i=0;i=1950){ + j++; + o={txt:"",refs:[]}; + }; + ids[j]=o; + o.txt=o.txt+(o.txt?"\n":"")+keys[i]; + o.refs.push(keyMp[keys[i]]); + }; + console.log("闇瑕佹煡璇"+(ids.length)+"娈垫暟鎹",ids); + + + + var idx=-1,sendCount=0; + var run=function(stack){ + stack=+stack||0; + idx++; + if(idx>=ids.length){ + thread--; + if(thread==0){ + end(); + }; + return; + }; + + var idx_=idx; + var id=ids[idx]; + var name=id.txt; + var tryLoad=function(){ + sendCount++; + $.ajax({ + url:"/zh/pinyin/show.php?pid="+proxyID + ,data:"t="+encodeURIComponent(name)+"&d=1&s=null&k=1&b=null&h=null&u=null&v=1&y=null&z=null&token="+PageToken + ,type:"POST" + ,dataType:"text" + ,timeout:40000 + ,error:function(e){ + console.error("--QueryPinYin error--",idx_,id,e); + idx=999999999; + QueryPinyinErrs++; + RunPinYin(); + } + ,success:function(html){ + QueryPinyinErrs=0; + var arr=html.replace(/<\/div>/,"").trim().split("
"); + var refs=id.refs; + if(arr.length!=refs.length){ + console.error("鏃犳晥鏌ヨ锛岃繑鍥炴暟閲忎笉瀵癸紝宸插仠姝細"+refs.length+":"+arr.length,idx_); + console.log(id); + console.log(arr); + return; + }; + var count=0; + for(var i=0;i]+(".*?")?)+>/g,"").replace(/\s+/g," ")); + for(var j=0;j3){ + QueryPinyinErrs=0; + queryProxy(); + return; + }; + console.log("鎶撳彇token"); + $.ajax({ + url:"/zh/pinyin/?pid="+proxyID + ,timeout:8000 + ,error:function(e){ + console.error("鎶撳彇token澶辫触锛屽皾璇曞垏鎹witchyOmega浠g悊"); + queryProxy(); + } + ,success:function(txt){ + PageToken=/&token=(\w+)/.exec(txt)[1]; + console.log("token:"+PageToken); + + RunPinYin.T1=Date.now(); + QueryPinYin(ViewDown); + } + }); +}; + + +RunPinYin(); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +//浠g悊鏂规硶 +//瑁呬笂SwitchyOmega 鏂板缓PAC鎯呮櫙妯″紡锛屽~鍏ヤ互涓嬩唬鐮 +var proxyList=[ + //姝ゅ濉啓閲囬泦鍒扮殑浠g悊鍒楄〃 +]; +function FindProxyForURL(url, host) { + if (/qqxiuzi\.cn.+pid=(\d+)/.test(url) || /TB1_uT8a5ERMeJjSspiXXbZLFXa-143-59\.png\?pid=(\d+)/.test(url)){ + var id=+RegExp.$1; + return "PROXY "+(proxyList[id]||"127.0.0.1:12345"); + }; + return "DIRECT"; +}; + + + +//浠g悊鍒楄〃鎶撳彇 +//http://www.xicidaili.com/nt/ 鍐呮墽琛 +var ntdata={}; +var ntrun=function(page){ + function ajax(url,True,False){ + var ajax=new XMLHttpRequest(); + ajax.open("GET",url); + ajax.onreadystatechange=function(){ + if(ajax.readyState==4){ + if(ajax.status==200){ + True(ajax.responseText); + }else{ + False(); + } + } + } + ajax.send(); + }; + var mx=20; + if(page>mx){ + var list=Object.keys(ntdata); + console.log("閲囬泦瀹屾垚"+list.length); + console.log(JSON.stringify(list)); + return; + }; + ajax("http://www.xicidaili.com/nt/"+page,function(str){ + console.log("宸叉姄鍙"+page+"/"+mx); + str=str.replace(/\s+/g," "); + + str=str.substr(str.indexOf("ip_list")); + str=str.substr(0,str.indexOf("")).replace(//i,""); + + var exp=/.+?(.+?)<\/td>.+?(.+?)<\/td>/ig,m; + while(m=exp.exec(str)){ + var k=m[1]+":"+m[2]; + ntdata[k]=(ntdata[k]||0)+1; + } + ntrun(++page); + },function(){ + console.error("鎶撳彇澶辫触"+page); + }); +}; +//ntrun(1); \ No newline at end of file