下载wyoming大学的探空数据
水平有限,欢迎指正交流,共同进步!【下载wyoming大学的探空数据】俄怀明大学将全球探空站的数据共享使用,并通过计算了很多的变量指数.非常有用,近期有这方面的需求,暂时写了一段未经优化的代码下载.
如果要大量下载可以尝试代理池1或者代理池2的方式,通过代理和各种规避反扒措施来抓取.
import os
import datetime
import requests
from io import StringIO
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import calendar
import random
# url = http://weather.uwyo.edu/cgi-bin/sounding?region=naconf&TYPE=TEXT%3ALIST&YEAR=2018&MONTH=07&FROM=0312&TO=0312&STNM=72558
# http://weather.uwyo.edu/cgi-bin/sounding?region=seasia&TYPE=TEXT%3ALIST&YEAR=2018&MONTH=07&FROM=0312&TO=0312&STNM=54511USER_AGENTS = ["Mozilla/4.0 (compatible;
MSIE 6.0;
Windows NT 5.1;
SV1;
AcooBrowser;
.NET CLR 1.1.4322;
.NET CLR 2.0.50727)", "Mozilla/4.0 (compatible;
MSIE 7.0;
Windows NT 6.0;
Acoo Browser;
SLCC1;
.NET CLR 2.0.50727;
Media Center PC 5.0;
.NET CLR 3.0.04506)", "Mozilla/4.0 (compatible;
MSIE 7.0;
AOL 9.5;
AOLBuild 4337.35;
Windows NT 5.1;
.NET CLR 1.1.4322;
.NET CLR 2.0.50727)", "Mozilla/5.0 (Windows;
U;
MSIE 9.0;
Windows NT 9.0;
en-US)",
"Mozilla/5.0 (compatible;
MSIE 9.0;
Windows NT 6.1;
Win64;
x64;
Trident/5.0;
.NET CLR 3.5.30729;
.NET CLR 3.0.30729;
.NET CLR 2.0.50727;
Media Center PC 6.0)", "Mozilla/5.0 (compatible;
MSIE 8.0;
Windows NT 6.0;
Trident/4.0;
WOW64;
Trident/4.0;
SLCC2;
.NET CLR 2.0.50727;
.NET CLR 3.5.30729;
.NET CLR 3.0.30729;
.NET CLR 1.0.3705;
.NET CLR 1.1.4322)", "Mozilla/4.0 (compatible;
MSIE 7.0b;
Windows NT 5.2;
.NET CLR 1.1.4322;
.NET CLR 2.0.50727;
InfoPath.2;
.NET CLR 3.0.04506.30)", ]def main(start_date, end_date, sta_id=54511):
pass
# now_date = datetime.date(year, month, 1)
allmonths = pd.date_range(start_date, end_date, freq='m')
# print(allmonths)
for idate, vdate in enumerate(allmonths.astype(object)):
handle_html(vdate.year, vdate.month)
print('搞定这个时间了:--->>>', vdate)def set_url(date, sta_id):
dayNums = calendar.monthrange(date.year, date.month)[1]
return 'http://weather.uwyo.edu/cgi-bin/sounding?region=seasia&TYPE=TEXT%3ALIST&YEAR={year}&MONTH={month:4s}&FROM=0100&TO={day_end}12&STNM={sta_id}'.format(
year=date.year,
month=str(date.month).zfill(2),
day_end=dayNums,
sta_id=str(sta_id)
)def handle_html(year, month, sta_id=54511):
now_date = datetime.date(year, month, 1)
urls = set_url(now_date, sta_id)
header = {'User-Agent': random.choice(USER_AGENTS)}
# print(random.choice(USER_AGENTS))
content = requests.get(urls, headers=header).content
# print(content)
soup_html(content)def soup_html(html_page):
soup = BeautifulSoup(html_page)
headers_1 = soup.find_all('h2')
details_data = https://www.it610.com/article/soup.find_all('pre')
d1s = details_data[::2]
d2s = details_data[1::2]
headers_2 = soup.find_all('h3')
for iheader, vheader in enumerate(headers_1):
convert_one_time(vheader, d1s[iheader], d2s[iheader])def convert_one_time(h1,d1, d2, filedir=None):
h1_list = h1.string.split()
obser_time = pd.to_datetime(
' '.join([i for i in h1_list[-4:]]), format='%HZ %d %b %Y')
if filedir is None:
filedir = os.path.join(homedir, 'data', obser_time.strftime('%Y%m'))
if not os.path.exists(filedir):
try:
os.makedirs(filedir)
except:
pass
# h1 = '_'.join([i for i in h1.string.split()])
filename1 = 'sounding_details_{}_{}.csv'.format(
h1_list[1], obser_time.strftime('%Y%m%d%H'))
filename2 = 'sounding_indices_{}_{}.csv'.format(
h1_list[1], obser_time.strftime('%Y%m%d%H'))detail_data = https://www.it610.com/article/pd.read_fwf(
StringIO(d1.string.replace('-', '')),
widths=[7 for _ in range(11)],skiprows=[1, 4]
)
detail_data.columns = [
'{}_{}'.format(i, j) for i, j in zip(detail_data.iloc[0], detail_data.iloc[1])
]
detail_data.drop([0, 1], axis=0, inplace=True)
detail_data.to_csv(os.path.join(filedir, filename1), index=False)index_data = https://www.it610.com/article/pd.read_csv(
StringIO(d2.string), delimiter=':', names=['variable', 'value']).Tindex_data.columns = index_data.iloc[0]
index_data.drop(['variable'], axis=0, inplace=True)
index_data.to_csv(os.path.join(filedir, filename2), index=False)if __name__ == '__main__':
homedir = os.path.dirname(os.path.realpath(__file__))
print(homedir)
datadir = os.path.join(homedir, 'data')
if not os.path.exists(datadir):
try:
os.makedirs(datadir)
except:
pass
# main('2000-01-01', '2018-07-01')
推荐阅读
- 我要做大厨
- 《真与假的困惑》???|《真与假的困惑》??? ——致良知是一种伟大的力量
- 知识
- 三十年后的广场舞大爷
- 奔向你的城市
- 村里的故事|村里的故事 --赵大头
- 期刊|期刊 | 国内核心期刊之(北大核心)
- 华为旁!大社区、地铁新盘,佳兆业城市广场五期!
- 汇讲-勇于突破
- 每日一话(49)——一位清华教授在朋友圈给大学生的9条建议