From 919e5fdf2f43a50a819da279db626a22329153eb Mon Sep 17 00:00:00 2001 From: geoyee Date: Thu, 11 Nov 2021 15:07:08 +0800 Subject: [PATCH 1/3] Init code --- projects/Baidu_POI_crawl/README.md | 21 +++ projects/Baidu_POI_crawl/main.py | 42 +++++ projects/Baidu_POI_crawl/requirements.txt | 3 + projects/Baidu_POI_crawl/util.py | 187 ++++++++++++++++++++++ 4 files changed, 253 insertions(+) create mode 100644 projects/Baidu_POI_crawl/README.md create mode 100644 projects/Baidu_POI_crawl/main.py create mode 100644 projects/Baidu_POI_crawl/requirements.txt create mode 100644 projects/Baidu_POI_crawl/util.py diff --git a/projects/Baidu_POI_crawl/README.md b/projects/Baidu_POI_crawl/README.md new file mode 100644 index 000000000..71c3d5667 --- /dev/null +++ b/projects/Baidu_POI_crawl/README.md @@ -0,0 +1,21 @@ +# Script Title + +Crawl the POI in the specified area through Baidu map API. + +### Prerequisites + +Modules required to be able to use the script successfully +and how to install them. +(Including a `requirements.txt` file will work.) + +### How to run the script + +Steps on how to run the script along with suitable examples. + +### Screenshot/GIF showing the sample use of the script + +Add a jpeg/png/gif file here. + +## *Author Name* + +[YiZhou Chen](https://github.com/geoyee) diff --git a/projects/Baidu_POI_crawl/main.py b/projects/Baidu_POI_crawl/main.py new file mode 100644 index 000000000..a2f9ecfdc --- /dev/null +++ b/projects/Baidu_POI_crawl/main.py @@ -0,0 +1,42 @@ +import os +import os.path as osp +from util import * + + +## ---------- config ---------- +# 百度AK +baidu_web_ak = 'your AK' +# 范围(左下点经纬度,右上点经纬度,x经度,y纬度) +wgs_l_x = 105.824149 +wgs_l_y = 28.524360 +wgs_r_x = 111.659451 +wgs_r_y = 31.730663 +# 滑动窗口大小(默认0.5效果不错) +kernel_x = 0.5 +kernel_y = 0.5 +# 索引号 +rec_index = 1 +# 兴趣区关键字 +roi_key = '桥' +# 保存目录 +output = 'output' + + +# # 新建文件夹 +# ! mkdir -p output +# 获取百度坐标系下的研究区范围 +rec_index -= 1 +l_x, l_y = wgs84_to_baidu(wgs_l_x, wgs_l_y, baidu_web_ak) +r_x, r_y = wgs84_to_baidu(wgs_r_x, wgs_r_y, baidu_web_ak) +print('左下点经纬度:', l_x, l_y) +print('右上点经纬度:', r_x, r_y) +num_x = math.ceil((r_x - l_x) / kernel_x) +num_y = math.ceil((r_y - l_y) / kernel_y) +num_rec = num_x * num_y +print('网格数:', num_rec) +for idx in range(rec_index, num_rec): + rec_str = get_rectangle(l_x, l_y, r_x, r_y, kernel_x, kernel_y, idx) + print('第', (idx+1), '块网格,当前区域坐标:', rec_str) + get_baidu_poi(roi_key, rec_str, baidu_web_ak, idx, output) + print('当前区域完成') + time.sleep(1) \ No newline at end of file diff --git a/projects/Baidu_POI_crawl/requirements.txt b/projects/Baidu_POI_crawl/requirements.txt new file mode 100644 index 000000000..22b449b71 --- /dev/null +++ b/projects/Baidu_POI_crawl/requirements.txt @@ -0,0 +1,3 @@ +urllib +requests +json \ No newline at end of file diff --git a/projects/Baidu_POI_crawl/util.py b/projects/Baidu_POI_crawl/util.py new file mode 100644 index 000000000..b837d80a4 --- /dev/null +++ b/projects/Baidu_POI_crawl/util.py @@ -0,0 +1,187 @@ +from urllib.request import urlopen, quote +import requests +import json +import time +import math + + +# WGS84转Baidu坐标系 +def wgs84_to_baidu(x, y, baidu_ak): + ''' + inputs: + x: WGS84下的经度 + y: WGS84下的纬度 + baidu_ak: 百度web API的AK秘钥 + outputs: + tuple: 百度坐标系下的经纬度 + ''' + data = str(x) + ',' + str(y) + url = 'http://api.map.baidu.com/geoconv/v1/?coords=' + data + '&from=1&to=5&ak=' + baidu_ak + req = urlopen(url) + res = req.read().decode() + temp = json.loads(res) + baidu_x = 0 + baidu_y = 0 + if temp['status'] == 0: + baidu_x = temp['result'][0]['x'] + baidu_y = temp['result'][0]['y'] + else: + print(temp['message']) + return (baidu_x, baidu_y) + + +# 获取当前小范围区域 +def get_rectangle(l_x, l_y, r_x, r_y, kernel_x, kernel_y, index): + ''' + inputs: + l_x: 百度坐标系下的左下角点经度 + l_y: 百度坐标系下的左下角点纬度 + r_x: 百度坐标系下的右上角点经度 + r_y: 百度坐标系下的右上角点纬度 + kernel_x: 经度方向核大小 + kernel_y: 纬度方向核大小 + index: 当前位置索引 + outputs: + string: 用于API的滑动窗口范围(左下右上模式) + ''' + num_x = math.ceil((r_x - l_x) / kernel_x) + num_y = math.ceil((r_y - l_y) / kernel_y) + left_x = l_x + kernel_x * (index % num_x) + left_y = l_y + kernel_y * (index // num_x) + right_x = (left_x + kernel_x) + right_y = (left_y + kernel_y) + rec_str = str(left_y) + ',' + str(left_x) + ',' + str(right_y) + ',' + str(right_x) # 这里返回是纬度,经度,要符合百度的API要求 + return rec_str + + +# Baidu系坐标到WGS84 +x_pi = 3.14159265358979324 * 3000.0 / 180.0 +pi = 3.1415926535897932384626 # π +a = 6378245.0 # 长半轴 +ee = 0.00669342162296594323 # 偏心率平方 + + +def bd09_to_gcj02(bd_lon, bd_lat): + ''' + inputs: + bd_lat: 百度坐标纬度 + bd_lon: 百度坐标经度 + return: 转换后的坐标列表形式 + ''' + x = bd_lon - 0.0065 + y = bd_lat - 0.006 + z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi) + theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi) + gg_lng = z * math.cos(theta) + gg_lat = z * math.sin(theta) + return [gg_lng, gg_lat] + + +def gcj02_to_wgs84(lng, lat): + ''' + inputs: + lng: 火星坐标系经度 + lat: 火星坐标系纬度 + return: 转换后的坐标列表形式 + ''' + if out_of_china(lng, lat): + return [lng, lat] + dlat = _transformlat(lng - 105.0, lat - 35.0) + dlng = _transformlng(lng - 105.0, lat - 35.0) + radlat = lat / 180.0 * pi + magic = math.sin(radlat) + magic = 1 - ee * magic * magic + sqrtmagic = math.sqrt(magic) + dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi) + dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi) + mglat = lat + dlat + mglng = lng + dlng + return [lng * 2 - mglng, lat * 2 - mglat] + + +def bd09_to_wgs84(bd_lon, bd_lat): + lon, lat = bd09_to_gcj02(bd_lon, bd_lat) + return gcj02_to_wgs84(lon, lat) + + +def _transformlat(lng, lat): + ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \ + 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng)) + ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * + math.sin(2.0 * lng * pi)) * 2.0 / 3.0 + ret += (20.0 * math.sin(lat * pi) + 40.0 * + math.sin(lat / 3.0 * pi)) * 2.0 / 3.0 + ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 * + math.sin(lat * pi / 30.0)) * 2.0 / 3.0 + return ret + + +def _transformlng(lng, lat): + ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \ + 0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng)) + ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * + math.sin(2.0 * lng * pi)) * 2.0 / 3.0 + ret += (20.0 * math.sin(lng * pi) + 40.0 * + math.sin(lng / 3.0 * pi)) * 2.0 / 3.0 + ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 * + math.sin(lng / 30.0 * pi)) * 2.0 / 3.0 + return ret + + +# 判断是否在国内,不在国内不做偏移 +def out_of_china(lng, lat): + ''' + inputs: + lng: 火星坐标系经度 + lat: 火星坐标系纬度 + return: 转换后坐标 + ''' + return not (lng > 73.66 and lng < 135.05 and lat > 3.86 and lat < 53.55) + + +# 对小窗口调用百度API +def get_baidu_poi(roi_key, rec_str, baidu_ak, index, output): + ''' + inputs: + roi_key: 兴趣区关键字 + rec_str: 滑动小区域坐标 + baidu_ak: 百度web API的AK秘钥 + index: 滑动小窗索引 + output: 文件保存位置 + ''' + now_time = time.strftime("%Y-%m-%d") + page_num = 0 + logfile = open(output + '/' + now_time + ".log", 'a+', encoding='utf-8') + file = open(output + '/' + now_time + ".txt", 'a+', encoding='utf-8') + while True: + try: + URL = "http://api.map.baidu.com/place/v2/search?query=" + roi_key + \ + "&bounds=" + rec_str + \ + "&output=json" + \ + "&ak=" + baidu_ak + \ + "&scope=2" + \ + "&page_size=20" + \ + "&page_num=" + str(page_num) + resp = requests.get(URL) + res = json.loads(resp.text) + # print('获取兴趣区') + if len(res['results']) == 0: + logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " stop " + str(index) + " " + rec_str + " " + str(page_num) + '\n') + break + else: + for r in res['results']: + j_name = r['name'] # 获取名称 + # 获取经纬度 + j_lat = r['location']['lat'] + j_lon = r['location']['lng'] + j_area = r['area'] # 获取行政区 + j_add = r['address'] # 获取具体地址 + j_lon, j_lat = bd09_to_wgs84(j_lon, j_lat) # 坐标转换 + j_str = str(j_name) + ',' + str(j_lon) + ',' + str(j_lat) + ',' + str(j_area) + ',' + str(j_add) + '\n' + file.writelines(j_str) + page_num += 1 + time.sleep(1) + except: + print("except") + logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " except " + str(index) + " " + rec_str + " " + str(page_num) + '\n') + break \ No newline at end of file From 84908804242dcee35cd9ad0c5be333d22d431c14 Mon Sep 17 00:00:00 2001 From: geoyee Date: Fri, 12 Nov 2021 10:32:21 +0800 Subject: [PATCH 2/3] disable rectangle search --- projects/Baidu_POI_crawl/README.md | 13 +- .../__pycache__/util.cpython-37.pyc | Bin 0 -> 5292 bytes projects/Baidu_POI_crawl/main.py | 76 +++++------ projects/Baidu_POI_crawl/util.py | 122 +++++++----------- 4 files changed, 99 insertions(+), 112 deletions(-) create mode 100644 projects/Baidu_POI_crawl/__pycache__/util.cpython-37.pyc diff --git a/projects/Baidu_POI_crawl/README.md b/projects/Baidu_POI_crawl/README.md index 71c3d5667..72987972f 100644 --- a/projects/Baidu_POI_crawl/README.md +++ b/projects/Baidu_POI_crawl/README.md @@ -1,21 +1,26 @@ # Script Title + Crawl the POI in the specified area through Baidu map API. ### Prerequisites -Modules required to be able to use the script successfully -and how to install them. -(Including a `requirements.txt` file will work.) + +1. `pip install -r requirements.txt` +2. Log in to [Baidu map open platform](https://lbsyun.baidu.com/apiconsole/key#/home), creating web API and record AK. ### How to run the script + -Steps on how to run the script along with suitable examples. + +1. `cd python-mini-projects\projects\Baidu_POI_crawl` +2. `python main.py --ak yours_ak --range x y x y --poi poi_name` ### Screenshot/GIF showing the sample use of the script Add a jpeg/png/gif file here. ## *Author Name* + [YiZhou Chen](https://github.com/geoyee) diff --git a/projects/Baidu_POI_crawl/__pycache__/util.cpython-37.pyc b/projects/Baidu_POI_crawl/__pycache__/util.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f80dadf797cb6fdef453ea1046e2c0d8d99fb05f GIT binary patch literal 5292 zcmb7IO>7)j9iQ)=UGJ_x;xYxNt(6TWJsU0z%?Yg$uv`o7v6Ub%4(3 zz5oAx%=`F%|L2)XrKI4w@aMO@o#Tq~J~al92BeM>zln+|Ol>P2)m2r(wYKJJ66&rl zq2U@5ny$%oX1t-e7BiUzSYQR$X0}^oMYqIC?id?mW%QR>g;fD7Y@AI1R@o$*0vu<@ z*fih-yN}%uILVH)8Nex~y{yzue2CL1H8m~tcsuI);gzONJ$UFUgnu3-{uU~sB+4z7 zsmBRZZvpPn_Oo3(csr82C{$ z3U4mnd~9)5ejl&&C+609)LD6KPDCq@&v{!bTm=(cD;A#U#i_;pxYrh`x|6EAX`$oC zu^0HYLaN7tbCR~y;J(LFi}}rn`KhrPM`3EVBag+Y+2yTJ)JojML~hY+dvO2MV4mAHn%(#`MJtN^x49 zP;IrWmQ(|88LcU;tnyGTUAhJo}WY#Vv#l%4WSW-$%z_QSiQmzVGQY2N7YM~|N5qwOlNF}L2 zTs7|_a8F9YVTo-wq|9#n+-dvk!nx7&xbXQS@j*eBv~Z3VE)x89gM>ReYMBv<|6i+c zI!v$I#bJ0W*bvx|hoAf~oDOsJGN0fuTR!%df$K*!fiKdU$HS-*9Ft zWIpcL>~Zdgq_$|v!H?-T^r#gE%TC;GL7xL>8*`&=hkIe*JL?gb#^Ll@Bt+E7Sm%Yz z5m7fMT?blMlS^~5qcuG>I-b}_ji%pfbDO%VsgCjO!k8Z*kCmZ(ev)N^&zrj7U;OdrZ+_HY4eoq%^Vi}#JFCGzH{O5qmmmG> zLh#B56F1^V-@Qk~kyqEp1PObvoDsTgUWM3K0;5C3kGV4w@jWm6%`z3P^5Z zq(DRjK{bZC0?t}kGq8bV4X2fCg;T^#JsVFf^cnlGj>HIBAEVe!1_K405P<<)y25A| z8}?R}SYbwy46?3}Bte#3DN>5^`o#gVUjLBX-G?MswNhG!b~d8*Msow6HLWxRv@nJa zcHnvsG{Yd(x-H33PGX&EzRw>fMD_SAEmnlOVU~_B5oF~xF^zlFO3jWJw3=>_#34h? zPC(NLgu!8^RwXlutI};<#uo8OR0_P54P}~={%Qg(2OK7~;*Vo=$YJtTWF91k$;}No zd>x#Gx{&YEZ;?2wS8AG+4!Ji5NYI`D93F#? zKrU1s<8MG1^fi(0kAb12r_3Oj<0g#_`bnpy219#E5WsW|B4nXYXAlF3hLLG@-X~Wc z^kHRK0}{|DN4`tc!8o&Nom_6oQ&CKLdiledat;JGA z`69?&)AYWl2IgdNeOTz&8vgf+&{rOP9PG@b)er zbqT~a%ZMHX)V35r*m2obkvu77C3AA6zx=qnviVYe+xNav z58mrf|LnQaQIWXe=o;Vm7J_UMQ`GGf22?tK4kb@y3Tl%iQe0!dNy7)m)y zCRqh|6=fU=r=D0cnl4K3Rm9FEVZmFKHf`WzqL?7sRuxe~8B5041f`3hnZ`dz3j1Yw z-bn-+Q*td`M(M}$Jpl*nvuO%PROLk_{NB!Yvvn>hUwd<}oRrypNJlGx_ajw34!oM< zGr-4zp8%WyJP9}{D#>_Ky`{m5kMC8neg)?_C3hN^yJvWH1fLkeCnb*4>`f*1emR+< zQ{DLcRVA_515htAtyT5_^@CzMBC@>xaVU;cS_S+&kNL@Z_Po1kSxz$^!Rg zXC0}h6MHv(r;9LKZq&DDug%YP=4Y8qp6{rOePf{=1%1u?Y~SpO_4)Jo_hI`XGGv7K z7!*6Azj)6)z1#Mh{^HH0#n|_Fb7S>J&*!@V*tRdwDK9%?c&keS^|=jK)p; zT3K1@SLV9N(HoS;tSp`Fm$D{8`V~a%GM$h6lY5`MSfF^0urnRM_+F_2pRPSfv-zU^d)bEP((8lpw(AURl< zm_fgPWLlEi$ohMJEaFs0p1{Yj1~1b>_zAEhc!f68IvvDNewdcFc}w{1R_Mnvq)5%U z?fc!@F(el zPWefya@X2y5CvRwvmv80fGDJ1d3*HY{xo>6tS1vZNbhS{FkVKb6zPY;Nxb;2P&kWF zt&A|uKo)dPHMA=5vh1-DH9d_&&_aDqFwajQ&CybH| z`Kv-ERs7S`B+t(uqv|YG6#C2HP7+c&b?H8&HI{pgm~H81WHM(zja0MO_AkIPD>3;E ZI=_4k`-FX*ewN_X?UFrVn|2wm`5!!R;Ryf$ literal 0 HcmV?d00001 diff --git a/projects/Baidu_POI_crawl/main.py b/projects/Baidu_POI_crawl/main.py index a2f9ecfdc..a6af39a51 100644 --- a/projects/Baidu_POI_crawl/main.py +++ b/projects/Baidu_POI_crawl/main.py @@ -1,42 +1,46 @@ import os import os.path as osp -from util import * +import math +import time +from util import wgs84_to_baidu, get_rectangle, get_baidu_poi +import argparse -## ---------- config ---------- -# 百度AK -baidu_web_ak = 'your AK' -# 范围(左下点经纬度,右上点经纬度,x经度,y纬度) -wgs_l_x = 105.824149 -wgs_l_y = 28.524360 -wgs_r_x = 111.659451 -wgs_r_y = 31.730663 -# 滑动窗口大小(默认0.5效果不错) -kernel_x = 0.5 -kernel_y = 0.5 -# 索引号 -rec_index = 1 -# 兴趣区关键字 -roi_key = '桥' -# 保存目录 -output = 'output' +def run(args): + baidu_web_ak = args.ak + wgs_l_x, wgs_l_y, wgs_r_x, wgs_r_y = args.range + kernel_x, kernel_y = args.ksize + rec_index = args.idx + roi_key = args.poi + output = args.save + if not osp.exists(output): + os.makedirs(output) + rec_index -= 1 + l_x, l_y = wgs84_to_baidu(wgs_l_x, wgs_l_y, baidu_web_ak) + r_x, r_y = wgs84_to_baidu(wgs_r_x, wgs_r_y, baidu_web_ak) + print("lonlat of the upper right point: ", l_x, l_y) + print("lonlat of the lower left point: ", r_x, r_y) + num_x = math.ceil((r_x - l_x) / kernel_x) + num_y = math.ceil((r_y - l_y) / kernel_y) + num_rec = num_x * num_y + print("number of grids: ", num_rec) + for idx in range(rec_index, num_rec): + rec_str = get_rectangle(l_x, l_y, r_x, r_y, kernel_x, kernel_y, idx) + print("No ", (idx+1), ", current area coordinates: ", rec_str) + get_baidu_poi(roi_key, rec_str, baidu_web_ak, idx, output) + print("current area completed") + time.sleep(1) -# # 新建文件夹 -# ! mkdir -p output -# 获取百度坐标系下的研究区范围 -rec_index -= 1 -l_x, l_y = wgs84_to_baidu(wgs_l_x, wgs_l_y, baidu_web_ak) -r_x, r_y = wgs84_to_baidu(wgs_r_x, wgs_r_y, baidu_web_ak) -print('左下点经纬度:', l_x, l_y) -print('右上点经纬度:', r_x, r_y) -num_x = math.ceil((r_x - l_x) / kernel_x) -num_y = math.ceil((r_y - l_y) / kernel_y) -num_rec = num_x * num_y -print('网格数:', num_rec) -for idx in range(rec_index, num_rec): - rec_str = get_rectangle(l_x, l_y, r_x, r_y, kernel_x, kernel_y, idx) - print('第', (idx+1), '块网格,当前区域坐标:', rec_str) - get_baidu_poi(roi_key, rec_str, baidu_web_ak, idx, output) - print('当前区域完成') - time.sleep(1) \ No newline at end of file +parser = argparse.ArgumentParser(description="input parameters") +parser.add_argument("--ak", type=str, required=True, help="Baidu web ak") +parser.add_argument("--range", type=float,nargs='+', required=True, help="Latlon of the lower left point and latlon of the upper right point") +parser.add_argument("--poi", type=str, required=True, help="POI key") +parser.add_argument("--save", type=str, default="output", help="Save path") +parser.add_argument("--ksize", type=float, nargs='+', default=[0.5, 0.5]) +parser.add_argument("--idx", type=int, default=1) + + +if __name__ == "__main__": + args = parser.parse_args() + run(args) \ No newline at end of file diff --git a/projects/Baidu_POI_crawl/util.py b/projects/Baidu_POI_crawl/util.py index b837d80a4..a8c6b07c2 100644 --- a/projects/Baidu_POI_crawl/util.py +++ b/projects/Baidu_POI_crawl/util.py @@ -1,73 +1,66 @@ -from urllib.request import urlopen, quote +from urllib.request import urlopen import requests import json import time import math -# WGS84转Baidu坐标系 +# coordinate system: WGS842Baidu def wgs84_to_baidu(x, y, baidu_ak): - ''' + """ inputs: - x: WGS84下的经度 - y: WGS84下的纬度 - baidu_ak: 百度web API的AK秘钥 + x: longitude in WGS84 + y: latitude in WGS84 + baidu_ak: baidu web API AK outputs: - tuple: 百度坐标系下的经纬度 - ''' - data = str(x) + ',' + str(y) - url = 'http://api.map.baidu.com/geoconv/v1/?coords=' + data + '&from=1&to=5&ak=' + baidu_ak + tuple: lonlat in baidu coordinate system + """ + data = str(x) + "," + str(y) + url = "http://api.map.baidu.com/geoconv/v1/?coords=" + data + "&from=1&to=5&ak=" + baidu_ak req = urlopen(url) res = req.read().decode() temp = json.loads(res) baidu_x = 0 baidu_y = 0 - if temp['status'] == 0: - baidu_x = temp['result'][0]['x'] - baidu_y = temp['result'][0]['y'] + if temp["status"] == 0: + baidu_x = temp["result"][0]["x"] + baidu_y = temp["result"][0]["y"] else: - print(temp['message']) + print(temp["message"]) return (baidu_x, baidu_y) -# 获取当前小范围区域 +# gets the current small area def get_rectangle(l_x, l_y, r_x, r_y, kernel_x, kernel_y, index): - ''' + """ inputs: - l_x: 百度坐标系下的左下角点经度 - l_y: 百度坐标系下的左下角点纬度 - r_x: 百度坐标系下的右上角点经度 - r_y: 百度坐标系下的右上角点纬度 - kernel_x: 经度方向核大小 - kernel_y: 纬度方向核大小 - index: 当前位置索引 + l_x: lower left quarter"s longitude in baidu coordinate system + l_y: lower left quarter"s latitude in baidu coordinate system + r_x: upper right corner"s longitude in baidu coordinate system + r_y: upper right corner"s latitude in baidu coordinate system + kernel_x: kernel size in longitude + kernel_y: kernel size in latitude + index: current index outputs: - string: 用于API的滑动窗口范围(左下右上模式) - ''' + string: sliding window range for API (bottom left and top right mode) + """ num_x = math.ceil((r_x - l_x) / kernel_x) num_y = math.ceil((r_y - l_y) / kernel_y) left_x = l_x + kernel_x * (index % num_x) left_y = l_y + kernel_y * (index // num_x) right_x = (left_x + kernel_x) right_y = (left_y + kernel_y) - rec_str = str(left_y) + ',' + str(left_x) + ',' + str(right_y) + ',' + str(right_x) # 这里返回是纬度,经度,要符合百度的API要求 + rec_str = str(left_y) + "," + str(left_x) + "," + str(right_y) + "," + str(right_x) # latitude, longitude return rec_str -# Baidu系坐标到WGS84 x_pi = 3.14159265358979324 * 3000.0 / 180.0 -pi = 3.1415926535897932384626 # π -a = 6378245.0 # 长半轴 -ee = 0.00669342162296594323 # 偏心率平方 +pi = 3.1415926535897932384626 +a = 6378245.0 +ee = 0.00669342162296594323 def bd09_to_gcj02(bd_lon, bd_lat): - ''' - inputs: - bd_lat: 百度坐标纬度 - bd_lon: 百度坐标经度 - return: 转换后的坐标列表形式 - ''' x = bd_lon - 0.0065 y = bd_lat - 0.006 z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi) @@ -78,12 +71,6 @@ def bd09_to_gcj02(bd_lon, bd_lat): def gcj02_to_wgs84(lng, lat): - ''' - inputs: - lng: 火星坐标系经度 - lat: 火星坐标系纬度 - return: 转换后的坐标列表形式 - ''' if out_of_china(lng, lat): return [lng, lat] dlat = _transformlat(lng - 105.0, lat - 35.0) @@ -128,31 +115,24 @@ def _transformlng(lng, lat): return ret -# 判断是否在国内,不在国内不做偏移 def out_of_china(lng, lat): - ''' - inputs: - lng: 火星坐标系经度 - lat: 火星坐标系纬度 - return: 转换后坐标 - ''' return not (lng > 73.66 and lng < 135.05 and lat > 3.86 and lat < 53.55) -# 对小窗口调用百度API +# call API for small window def get_baidu_poi(roi_key, rec_str, baidu_ak, index, output): - ''' + """ inputs: - roi_key: 兴趣区关键字 - rec_str: 滑动小区域坐标 - baidu_ak: 百度web API的AK秘钥 - index: 滑动小窗索引 - output: 文件保存位置 - ''' + roi_key: poi name + rec_str: coordinate of sliding window + baidu_ak: baidu web API AK + index: index of sliding window + output: file save path + """ now_time = time.strftime("%Y-%m-%d") page_num = 0 - logfile = open(output + '/' + now_time + ".log", 'a+', encoding='utf-8') - file = open(output + '/' + now_time + ".txt", 'a+', encoding='utf-8') + logfile = open(output + "/" + now_time + ".log", "a+", encoding="utf-8") + file = open(output + "/" + now_time + ".txt", "a+", encoding="utf-8") while True: try: URL = "http://api.map.baidu.com/place/v2/search?query=" + roi_key + \ @@ -164,24 +144,22 @@ def get_baidu_poi(roi_key, rec_str, baidu_ak, index, output): "&page_num=" + str(page_num) resp = requests.get(URL) res = json.loads(resp.text) - # print('获取兴趣区') - if len(res['results']) == 0: - logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " stop " + str(index) + " " + rec_str + " " + str(page_num) + '\n') + if len(res["results"]) == 0: + logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " stop " + str(index) + " " + rec_str + " " + str(page_num) + "\n") break else: - for r in res['results']: - j_name = r['name'] # 获取名称 - # 获取经纬度 - j_lat = r['location']['lat'] - j_lon = r['location']['lng'] - j_area = r['area'] # 获取行政区 - j_add = r['address'] # 获取具体地址 - j_lon, j_lat = bd09_to_wgs84(j_lon, j_lat) # 坐标转换 - j_str = str(j_name) + ',' + str(j_lon) + ',' + str(j_lat) + ',' + str(j_area) + ',' + str(j_add) + '\n' + for r in res["results"]: + j_name = r["name"] + j_lat = r["location"]["lat"] + j_lon = r["location"]["lng"] + j_area = r["area"] + j_add = r["address"] + j_lon, j_lat = bd09_to_wgs84(j_lon, j_lat) + j_str = str(j_name) + "," + str(j_lon) + "," + str(j_lat) + "," + str(j_area) + "," + str(j_add) + "\n" file.writelines(j_str) page_num += 1 time.sleep(1) except: print("except") - logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " except " + str(index) + " " + rec_str + " " + str(page_num) + '\n') + logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " except " + str(index) + " " + rec_str + " " + str(page_num) + "\n") break \ No newline at end of file From 58f08df821604df82a48c2c32fc87a573c5b77f2 Mon Sep 17 00:00:00 2001 From: geoyee Date: Wed, 17 Nov 2021 17:40:09 +0800 Subject: [PATCH 3/3] Update BPC --- projects/Baidu_POI_crawl/README.md | 10 +- .../__pycache__/util.cpython-38.pyc | Bin 0 -> 1447 bytes projects/Baidu_POI_crawl/main.py | 29 +--- .../Baidu_POI_crawl/output/2021-11-17.log | 1 + .../Baidu_POI_crawl/output/2021-11-17.txt | 54 ++++++++ projects/Baidu_POI_crawl/requirements.txt | 1 - projects/Baidu_POI_crawl/util.py | 130 +----------------- 7 files changed, 73 insertions(+), 152 deletions(-) create mode 100644 projects/Baidu_POI_crawl/__pycache__/util.cpython-38.pyc create mode 100644 projects/Baidu_POI_crawl/output/2021-11-17.log create mode 100644 projects/Baidu_POI_crawl/output/2021-11-17.txt diff --git a/projects/Baidu_POI_crawl/README.md b/projects/Baidu_POI_crawl/README.md index 72987972f..9dc7ff0e0 100644 --- a/projects/Baidu_POI_crawl/README.md +++ b/projects/Baidu_POI_crawl/README.md @@ -1,7 +1,7 @@ # Script Title -Crawl the POI in the specified area through Baidu map API. +Crawl the POI in the city through Baidu map API. ### Prerequisites @@ -14,11 +14,15 @@ Crawl the POI in the specified area through Baidu map API. 1. `cd python-mini-projects\projects\Baidu_POI_crawl` -2. `python main.py --ak yours_ak --range x y x y --poi poi_name` +2. `python main.py --ak yours_ak --city city_name --poi poi_name` ### Screenshot/GIF showing the sample use of the script + -Add a jpeg/png/gif file here. + +![image-20211117172514622](https://user-images.githubusercontent.com/71769312/142175449-294daf40-413a-43df-aa3a-8d99a203afa9.png) + +![UXGOS$6WMD)`{XQ$8YK}7WU](https://user-images.githubusercontent.com/71769312/142175459-8f10d1c4-5c5d-4754-9fd5-d5ec58a79081.png) ## *Author Name* diff --git a/projects/Baidu_POI_crawl/__pycache__/util.cpython-38.pyc b/projects/Baidu_POI_crawl/__pycache__/util.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ce59c01d50dfe75d4fd20af2dd1c90fde36028ae GIT binary patch literal 1447 zcma)6OLOBy5S|%5JW=etc!XqO(E+D$fo*CpP_7NI;ZdYumo2syh{}{}qM0~KWJ!@m zn?xm_vKJ0i?SUWQ9O1xk;0NrH(_A_2Par&d)P3^)%~ z2TlMRU<+W2t4JE9bq4t{mFC1MDV#bd?MbbiJJ8&bbEU(V*3cQ|yL3eq{8#8=yhA&kdIv6jPwTV+gj~E^OR$BbVy9jO)P3HNHX?I6k z80nv^^HF03F*a+&oiuH!Tg28ZIb=O%IBMr+Nwa;iR6s7lgvi*dyU4Ozv0d zf;M;B%~89_Q}~e31@0DRCya*$VQ&>yF8A8+wXmWh@?#!ELrBUKS#4wmMk^x1VXT$`&WSNaVd%Lzyzrd*P1vDrQ1Ikuo4wG!>vC zlbJGxLMl@?uBCvmX zDSLV$9*yGTfj-=$)g^#xuLh&gQ&Xn)p7HRjFFX@8B+3ALi`olo(=ezynRPM)7dlW=LJ4xdM;mfRlO}46! z?EaVA13zWQVLz8a*hx-R18!t6RRSMvP5&W%4GvKUn|K|s6BFB*{7bYE@((e{-<6r< zFJh5DiA^quZC{u*2iNfueiKtt#}v5s6yL6J0cudlr7l!%ndu@xx~$#2cCL0>*o=D} XLPc+nVxEWMeLd_9{*RHaNzMNNLUMf> literal 0 HcmV?d00001 diff --git a/projects/Baidu_POI_crawl/main.py b/projects/Baidu_POI_crawl/main.py index a6af39a51..f7c562005 100644 --- a/projects/Baidu_POI_crawl/main.py +++ b/projects/Baidu_POI_crawl/main.py @@ -1,44 +1,25 @@ import os import os.path as osp -import math -import time -from util import wgs84_to_baidu, get_rectangle, get_baidu_poi +from util import get_baidu_poi import argparse def run(args): baidu_web_ak = args.ak - wgs_l_x, wgs_l_y, wgs_r_x, wgs_r_y = args.range - kernel_x, kernel_y = args.ksize - rec_index = args.idx + city_str = args.city roi_key = args.poi output = args.save if not osp.exists(output): os.makedirs(output) - rec_index -= 1 - l_x, l_y = wgs84_to_baidu(wgs_l_x, wgs_l_y, baidu_web_ak) - r_x, r_y = wgs84_to_baidu(wgs_r_x, wgs_r_y, baidu_web_ak) - print("lonlat of the upper right point: ", l_x, l_y) - print("lonlat of the lower left point: ", r_x, r_y) - num_x = math.ceil((r_x - l_x) / kernel_x) - num_y = math.ceil((r_y - l_y) / kernel_y) - num_rec = num_x * num_y - print("number of grids: ", num_rec) - for idx in range(rec_index, num_rec): - rec_str = get_rectangle(l_x, l_y, r_x, r_y, kernel_x, kernel_y, idx) - print("No ", (idx+1), ", current area coordinates: ", rec_str) - get_baidu_poi(roi_key, rec_str, baidu_web_ak, idx, output) - print("current area completed") - time.sleep(1) + get_baidu_poi(roi_key, city_str, baidu_web_ak, output) + print("current area completed") parser = argparse.ArgumentParser(description="input parameters") parser.add_argument("--ak", type=str, required=True, help="Baidu web ak") -parser.add_argument("--range", type=float,nargs='+', required=True, help="Latlon of the lower left point and latlon of the upper right point") +parser.add_argument("--city", type=str, required=True, help="City name") parser.add_argument("--poi", type=str, required=True, help="POI key") parser.add_argument("--save", type=str, default="output", help="Save path") -parser.add_argument("--ksize", type=float, nargs='+', default=[0.5, 0.5]) -parser.add_argument("--idx", type=int, default=1) if __name__ == "__main__": diff --git a/projects/Baidu_POI_crawl/output/2021-11-17.log b/projects/Baidu_POI_crawl/output/2021-11-17.log new file mode 100644 index 000000000..938f3a7af --- /dev/null +++ b/projects/Baidu_POI_crawl/output/2021-11-17.log @@ -0,0 +1 @@ +2021-11-17-17-36-18 成都 3 diff --git a/projects/Baidu_POI_crawl/output/2021-11-17.txt b/projects/Baidu_POI_crawl/output/2021-11-17.txt new file mode 100644 index 000000000..277fcc33c --- /dev/null +++ b/projects/Baidu_POI_crawl/output/2021-11-17.txt @@ -0,0 +1,54 @@ +四川大学(望江校区),104.090633,30.637031,武侯区,四川省成都市武侯区一环路南一段24号 +四川大学(江安校区),104.005145,30.562814,双流区,成都市双流区川大路二段2号 +电子科技大学(沙河校区),104.107198,30.681868,成华区,四川省成都市成华区建设北路二段4号 +成都大学,104.196613,30.656051,龙泉驿区,四川省成都市龙泉驿区成洛大道2025号 +西南民族大学(武侯校区),104.055946,30.645411,武侯区,四川省成都市武侯区一环路南四段16号 +西南财经大学(柳林校区),103.827675,30.687832,温江区,成都市温江区柳台大道555号 +西南交通大学(九里校区),104.059439,30.704977,金牛区,四川省成都市金牛区二环路北一段111号 +电子科技大学(清水河校区),103.937404,30.756035,郫都区,四川省成都市高新区西源大道2006号 +西南交通大学(犀浦校区),103.993214,30.770399,郫都区,四川省成都市郫都区犀安路999号 +成都中医药大学(十二桥校区),104.050309,30.672574,金牛区,四川省成都市金牛区十二桥路37号 +四川农业大学(都江堰校区),103.629275,31.009812,都江堰市,四川省成都市都江堰市建设路288号 +四川大学(华西校区),104.075894,30.646763,武侯区,成都市武侯区人民南路三段17号 +成都艺术职业大学,103.892092,30.493563,新津区,四川省成都市新津区花源街道白云大道115号 +电子科技大学(九里堤校区),104.055669,30.716153,金牛区,成都市金牛区九里堤西路8号 +电子科技大学继续教育学院,104.103193,30.679693,成华区,四川省成都市成华区一环路东一段240号 +电子科技大学沙河校区-逸夫楼,104.109665,30.680913,成华区,成都市成华区建设北路二段4号电子科技大学沙河校区 +成都大学-四川抗菌素工业研究所,104.177919,30.694508,成华区,成都市成华区华冠路168号 +我的大学,104.197398,30.828142,新都区,成都市新都区同仁路199号 +成都中医药大学附属医院,104.048468,30.673511,金牛区,成都市金牛区十二桥路39-41号 +电子科技大学西区科技园,103.98074,30.739837,郫都区,四川省成都市郫都区天辰路88号 +成都大学-图书馆,104.195601,30.656236,龙泉驿区,四川省成都市龙泉驿区十陵镇成洛大道 +成都广播电视大学继续教育学院(建设北路一段),104.102742,30.677484,成华区,成都市成华区建设北路一段7号 +西南石油大学新体测中心,104.194813,30.838617,新都区,成都市新都区鸿运大道东段西南石油大学(成都校区) +成都理工大学东苑-9栋,104.158206,30.688295,成华区,成都市成华区民智巷理工东苑-西区 +成都广播电视大学直属城东学院-教学楼1号楼,104.102347,30.677874,成华区,成都市成华区建设北路一段7号 +电子科技大学附属实验小学(沙河校区),104.10353,30.683807,成华区,四川省成都市成华区府青路2段-3号-青1号 +成都大学附属中学校,104.098172,30.688535,成华区,四川省成都市成华区府青路街道三友路135号 +成都理工大学附属小学,104.153179,30.696257,成华区,四川省成都市成华区民兴东路62号 +西南财经大学,104.442003,30.862562,金堂县,成都市金堂县幸福横街百合苑(幸福横路) +电子科技大学实验幼儿园,104.10855,30.684714,成华区,成都市成华区建设北路二段5号东院沙河缘15号 +四川师范大学附属天府欧城幼稚园,104.261685,30.898376,青白江区,成都市青白江区同华大道与新河路交叉路口往西北约260米 +西南石油大学学生公寓-4号楼,104.19334,30.829785,新都区,四川省成都市新都区大学路160号 +西南石油大学教工41幢,104.188074,30.83157,新都区,成都市新都区蜀龙大道北段香城学府 +西南石油大学教工42幢,104.188467,30.831576,新都区,成都市新都区蜀龙大道北段香城学府 +西南石油大学(成都校区)教工宿舍-12幢,104.188587,30.830054,新都区,成都市新都区南环路香城学府 +西南石油大学材料科学与工程学院,104.190255,30.837702,新都区,成都市新都区蜀龙大道北段西南石油大学(成都校区) +西南石油大学教工35幢,104.187797,30.832034,新都区,成都市新都区蜀龙大道北段香城学府 +成都医学院第一附属医院-大学生宿舍,104.165693,30.836846,新都区,成都市新都区新新街二巷成都医学院第一附属医院北侧 +成都医学院第一附属医院大学生宿舍-33幢,104.165218,30.836607,新都区,四川省成都市新都区成都医学院第一附属医院大学生宿舍33幢 +电子科技大学医院,104.11028,30.68007,成华区,成都市成华区建设北路二段4号电子科技大学沙河校区 +西南石油大学(成都校区)教工宿舍-2幢,104.186624,30.828824,新都区,成都市新都区嘉陵路西南石油大学(成都校区)教工宿舍2幢 +四川农业大学都江堰校区-教职工住宅第5幢,103.630269,31.010702,都江堰市,四川省成都市都江堰市柳岸路附近四川农业大学都江堰校区教职工住宅第5幢 +四川农业大学都江堰校区第一教学楼-侧楼,103.627828,31.010159,都江堰市,四川省成都市都江堰市建设路288号 +四川农业大学都江堰校区学生公寓第-1幢,103.630687,31.009693,都江堰市,四川省成都市都江堰市观景路41号附近四川农业大学都江堰校区学生公寓第1幢 +四川农业大学-第二林业勘察设计研究所,103.626795,31.010544,都江堰市,成都市都江堰市建设路288号 +西华大学老川东食品科研中心,104.221373,30.822995,新都区,成都市新都区君跃路四川老川东食品有限公司 +西南石油大学成都校区油气钻井技术国家工程实验室钻头研究室,104.190204,30.836932,新都区,成都市新都区蜀龙大道北段西南石油大学(成都校区) +四川师范大学附属田童幼儿园,103.630805,30.97236,都江堰市,成都市都江堰市幸福镇灌温路78号 +四川农业大学都江堰校区-教职工住宅第14幢,103.628016,31.012494,都江堰市,成都市都江堰市建设路288号四川农业大学(都江堰校区) +四川西南交通大学希望学院-图书馆,104.471273,30.85726,金堂县,四川省成都市金堂县学府路8号 +四川农业大学都江堰校区-教职工住宅第7幢,103.630133,31.010203,都江堰市,成都市都江堰市建设路288号四川农业大学(都江堰校区) +西南石油大学教工-28幢,104.187065,30.832293,新都区,成都市新都区西南石油大学(成都校区)教工宿舍28幢 +西华大学彭州校区-女生公寓,103.949394,30.98662,彭州市,成都市彭州市南大街168号 +四川农业大学都江堰校区-研究生公寓,103.632825,31.010149,都江堰市,成都市都江堰市平武巷柳岸公寓东南门南侧约90米 diff --git a/projects/Baidu_POI_crawl/requirements.txt b/projects/Baidu_POI_crawl/requirements.txt index 22b449b71..0b37a21e8 100644 --- a/projects/Baidu_POI_crawl/requirements.txt +++ b/projects/Baidu_POI_crawl/requirements.txt @@ -1,3 +1,2 @@ -urllib requests json \ No newline at end of file diff --git a/projects/Baidu_POI_crawl/util.py b/projects/Baidu_POI_crawl/util.py index a8c6b07c2..b00c3dc71 100644 --- a/projects/Baidu_POI_crawl/util.py +++ b/projects/Baidu_POI_crawl/util.py @@ -1,132 +1,15 @@ -from urllib.request import urlopen import requests import json import time -import math - - -# coordinate system: WGS842Baidu -def wgs84_to_baidu(x, y, baidu_ak): - """ - inputs: - x: longitude in WGS84 - y: latitude in WGS84 - baidu_ak: baidu web API AK - outputs: - tuple: lonlat in baidu coordinate system - """ - data = str(x) + "," + str(y) - url = "http://api.map.baidu.com/geoconv/v1/?coords=" + data + "&from=1&to=5&ak=" + baidu_ak - req = urlopen(url) - res = req.read().decode() - temp = json.loads(res) - baidu_x = 0 - baidu_y = 0 - if temp["status"] == 0: - baidu_x = temp["result"][0]["x"] - baidu_y = temp["result"][0]["y"] - else: - print(temp["message"]) - return (baidu_x, baidu_y) - - -# gets the current small area -def get_rectangle(l_x, l_y, r_x, r_y, kernel_x, kernel_y, index): - """ - inputs: - l_x: lower left quarter"s longitude in baidu coordinate system - l_y: lower left quarter"s latitude in baidu coordinate system - r_x: upper right corner"s longitude in baidu coordinate system - r_y: upper right corner"s latitude in baidu coordinate system - kernel_x: kernel size in longitude - kernel_y: kernel size in latitude - index: current index - outputs: - string: sliding window range for API (bottom left and top right mode) - """ - num_x = math.ceil((r_x - l_x) / kernel_x) - num_y = math.ceil((r_y - l_y) / kernel_y) - left_x = l_x + kernel_x * (index % num_x) - left_y = l_y + kernel_y * (index // num_x) - right_x = (left_x + kernel_x) - right_y = (left_y + kernel_y) - rec_str = str(left_y) + "," + str(left_x) + "," + str(right_y) + "," + str(right_x) # latitude, longitude - return rec_str - - -x_pi = 3.14159265358979324 * 3000.0 / 180.0 -pi = 3.1415926535897932384626 -a = 6378245.0 -ee = 0.00669342162296594323 - - -def bd09_to_gcj02(bd_lon, bd_lat): - x = bd_lon - 0.0065 - y = bd_lat - 0.006 - z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi) - theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi) - gg_lng = z * math.cos(theta) - gg_lat = z * math.sin(theta) - return [gg_lng, gg_lat] - - -def gcj02_to_wgs84(lng, lat): - if out_of_china(lng, lat): - return [lng, lat] - dlat = _transformlat(lng - 105.0, lat - 35.0) - dlng = _transformlng(lng - 105.0, lat - 35.0) - radlat = lat / 180.0 * pi - magic = math.sin(radlat) - magic = 1 - ee * magic * magic - sqrtmagic = math.sqrt(magic) - dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi) - dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi) - mglat = lat + dlat - mglng = lng + dlng - return [lng * 2 - mglng, lat * 2 - mglat] - - -def bd09_to_wgs84(bd_lon, bd_lat): - lon, lat = bd09_to_gcj02(bd_lon, bd_lat) - return gcj02_to_wgs84(lon, lat) - - -def _transformlat(lng, lat): - ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \ - 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng)) - ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * - math.sin(2.0 * lng * pi)) * 2.0 / 3.0 - ret += (20.0 * math.sin(lat * pi) + 40.0 * - math.sin(lat / 3.0 * pi)) * 2.0 / 3.0 - ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 * - math.sin(lat * pi / 30.0)) * 2.0 / 3.0 - return ret - - -def _transformlng(lng, lat): - ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \ - 0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng)) - ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * - math.sin(2.0 * lng * pi)) * 2.0 / 3.0 - ret += (20.0 * math.sin(lng * pi) + 40.0 * - math.sin(lng / 3.0 * pi)) * 2.0 / 3.0 - ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 * - math.sin(lng / 30.0 * pi)) * 2.0 / 3.0 - return ret - - -def out_of_china(lng, lat): - return not (lng > 73.66 and lng < 135.05 and lat > 3.86 and lat < 53.55) -# call API for small window -def get_baidu_poi(roi_key, rec_str, baidu_ak, index, output): +# call API +def get_baidu_poi(roi_key, city_str, baidu_ak, output): """ inputs: roi_key: poi name - rec_str: coordinate of sliding window + city_str: city name baidu_ak: baidu web API AK - index: index of sliding window output: file save path """ now_time = time.strftime("%Y-%m-%d") @@ -136,7 +19,7 @@ def get_baidu_poi(roi_key, rec_str, baidu_ak, index, output): while True: try: URL = "http://api.map.baidu.com/place/v2/search?query=" + roi_key + \ - "&bounds=" + rec_str + \ + "®ion=" + city_str + \ "&output=json" + \ "&ak=" + baidu_ak + \ "&scope=2" + \ @@ -145,7 +28,7 @@ def get_baidu_poi(roi_key, rec_str, baidu_ak, index, output): resp = requests.get(URL) res = json.loads(resp.text) if len(res["results"]) == 0: - logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " stop " + str(index) + " " + rec_str + " " + str(page_num) + "\n") + logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " " + city_str + " " + str(page_num) + "\n") break else: for r in res["results"]: @@ -154,12 +37,11 @@ def get_baidu_poi(roi_key, rec_str, baidu_ak, index, output): j_lon = r["location"]["lng"] j_area = r["area"] j_add = r["address"] - j_lon, j_lat = bd09_to_wgs84(j_lon, j_lat) j_str = str(j_name) + "," + str(j_lon) + "," + str(j_lat) + "," + str(j_area) + "," + str(j_add) + "\n" file.writelines(j_str) page_num += 1 time.sleep(1) except: print("except") - logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " except " + str(index) + " " + rec_str + " " + str(page_num) + "\n") + logfile.writelines(time.strftime("%Y-%m-%d-%H-%M-%S") + " " + city_str + " " + str(page_num) + "\n") break \ No newline at end of file