失眠网,内容丰富有趣,生活中的好帮手!
失眠网 > python小象学院: JSON文件 /网络爬虫/Pandas-----空气质量描述

python小象学院: JSON文件 /网络爬虫/Pandas-----空气质量描述

时间:2023-09-23 08:44:26

相关推荐

python小象学院:  JSON文件 /网络爬虫/Pandas-----空气质量描述

博客推荐:

/article/7251318624/#70_AQI_312

功能1.0 AQI计算

""""auther:Susanfunction:AQI Calculationversion:v1.0data:/4/27"""def cal_linear(iaqi_lo,iaqi_hi,bp_lo,bp_hi,cp):"""Range scaling"""iaqi = (iaqi_hi-iaqi_lo)*(cp-bp_lo)/(bp_hi-bp_lo)+iaqi_loreturn iaqidef cal_pm_iaqi(pm_val):"""Calculation:pm2.5_IAQI"""if 0 <= pm_val < 35:iaqi = cal_linear(0, 50, 0, 35, pm_val)elif 35 <= pm_val <75:iaqi = cal_linear(50, 100, 35, 75,pm_val)elif 75 <= pm_val <115:iaqi = cal_linear(100, 150, 75, 115,pm_val)elif 115 <= pm_val <150:iaqi = cal_linear(150, 200, 115, 150,pm_val)else:passdef cal_co_iapi(co_val):"""Calculation:CO_IAQI"""# global iaqiif 0 <= co_val < 3:iaqi = cal_linear(0, 50, 0, 3, co_val)elif 3 <= co_val <5:iaqi = cal_linear(50, 100, 2, 4,co_val)elif 5 <= co_val <15:iaqi = cal_linear(100, 150,5, 14,co_val)else:passdef cal_api(param_list):"""AQI Calculation"""pm_val = param_list[0]co_val = param_list[1]pm_iaqi = cal_pm_iaqi(pm_val)co_iaqi = cal_co_iapi(co_val)iaqi_list = []iaqi_list.append(pm_iaqi)iaqi_list.append(co_iaqi)aqi = max(iaqi_list)return aqidef main():print('Please enter this information,and separate by spaces.')input_str = input('(1)PM2.5: (2)CO:')str_list = input_str.split(' ')pm_val = float(str_list[0])co_val = float(str_list[1])param_list = []param_list.append(pm_val)param_list.append(co_val)"""Transfer function of AQI Calculation"""aqi_val = cal_api(param_list)print('Air quality index value: {} '.format(aqi_val))if __name__ == '__main__':main()

功能2.0 JSON读取

# -*- coding:utf-8 -*-""""auther:Susanfunction:JSON readingversion:v2.0data:/4/27"""import jsondef process_json_file(filepath):f = open(filepath,mode='r', encoding='utf-8')city_list = json.load(f)return city_listdef main():filepath = input('Please input a json filemane:')city_list = process_json_file(filepath)city_list.sort(key=lambda city:city['aqi'])top5_list = city_list[:5]f = open('top5_aqi.json',mode='w',encoding='utf-8')json.dump(top5_list,f, ensure_ascii=False)f.close()print (city_list)if __name__ == '__main__':main()

功能3.0 CSV读取

# -*- coding:utf-8 -*-""""auther:Susanfunction:CSV readingversion:v3.0data:/4/27"""import jsonimport csvdef process_json_file(filepath):f = open(filepath,mode='r', encoding='utf-8')city_list = json.load(f)return city_listdef main():filepath = input('Please input a json filemane:')city_list = process_json_file(filepath)city_list.sort(key=lambda city:city['aqi'])top5_list = city_list[:5]lines = []#Column namelines.append(city_list[0].keys())#for city in city_list:lines.append(list(city.values()))f = open('aqi1.csv','w',encoding='utf-8',newline='')writer = csv.writer(f)for line in lines:writer.writerow(line)f.close()if __name__ == '__main__':main()

newline=''

新行不加入任何字符,不指定则会在新行末尾加入空行

根据输入的文件判断是JSON格式还是CSV格式,并进行相应的操作

功能4.0 判断文件格式

# -*- coding:utf-8 -*-""""auther:Susanfunction:Judge file formatversion:v4.0data:/5/8CSV:comma separated values"""import jsonimport csvimport osdef process_json_file(filepath):# f = open(filepath,mode='r', encoding='utf-8')# city_list = json.load(f)## return city_listwith open(filepath,mode='r', encoding='utf-8') as f:city_list = json.load(f)print(city_list)def process_csv_file(filepath):with open(filepath,mode='r', encoding='utf-8',newline='') as f:reader = csv.reader(f)for row in reader:print(','.join(row))#Connect each element in the list with a commadef main():filepath = input('Please input a filemane:')# 'json' in filepathfilename,file_ext = os.path.splitext(filepath)if file_ext == '.json':process_json_file(filepath)elif file_ext == '.csv':process_csv_file(filepath)else:print('Unsupported file format!')if __name__ == '__main__':main()

功能5.0 利用爬虫做实时计算

# -*- coding:utf-8 -*-""""auther:Susanfunction:Use crawlers for real-time calculationsversion:v5.0data:/5/8Access the webpage through the crawler and display it to the user"""import requestsdef get_html_text(url):"""Return url text"""r = requests.get(url,timeout=30)print(r.status_code)return r.textdef main():city_pinyin = input('Please enter the city pinyin:')url = 'http://pm25.in/'+city_pinyinurl_text = get_html_text(url)aqi_div = ''' <div class="span12 data"><div class="span1"><div class="value">'''index = url_text.find(aqi_div)begin_index = index + len(aqi_div)end_index = begin_index + 2aqi_val = url_text[begin_index:end_index]print('Air quality:{}'.format(aqi_val))# print(url_text)if __name__ == '__main__':main()

• 为了能有效地提取并利用网络信息并工作提高效率,出现了网络爬虫

• 利用网络爬虫实时获取城市的空气质量

• 高效地解析和处理HTML,beautifulsoup4

功能6.0 利用网络爬虫实时获取城市的空气质量

# -*- coding:utf-8 -*-""""auther:Susanfunction:Parse HTML content more efficiently by BeautifilSoupversion:v8.0data:/5/8"""import requestsfrom bs4 import BeautifulSoupdef get_city_aqi(city_pinyin):"""Return url text"""url = 'http://pm25.in/'+city_pinyinr = requests.get(url,timeout=30)soup = BeautifulSoup(r.text,'lxml')div_list = soup.find_all('div',{'class':'span1'})city_aqi = []for i in range(8):div_content = div_list[i]caption = div_content.find('div',{'class':'caption'}).text.strip()value = div_content.find('div',{'class':'value'}).text.strip()city_aqi.append((caption,value))return city_aqidef main():city_pinyin = input('Please enter the city pinyin:')city_aqi = get_city_aqi(city_pinyin)print('Air quality:{}'.format(city_aqi))if __name__ == '__main__':main()

Python strip() 方法用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。

注意:该方法只能删除开头或是结尾的字符,不能删除中间部分的字符。

功能7.0 利用beautifulsoup4获取所有城市的空气质量

# -*- coding:utf-8 -*-""""auther:Susanfunction:BeautifilSoupversion:v7.0data:/5/10"""import requestsfrom bs4 import BeautifulSoupdef get_city_aqi(city_pinyin):"""Return url text"""url = 'http://pm25.in/'+city_pinyinr = requests.get(url,timeout=30)soup = BeautifulSoup(r.text,'lxml')div_list = soup.find_all('div',{'class':'span1'})city_aqi = []for i in range(8):div_content = div_list[i]caption = div_content.find('div',{'class':'caption'}).text.strip()value = div_content.find('div',{'class':'value'}).text.strip()city_aqi.append((caption,value))return city_aqidef get_all_cities():"""Get the city"""url = 'http://pm25.in/'city_list = []r = requests.get(url,timeout=30)soup = BeautifulSoup(r.text,'lxml')city_div = soup.find_all('div',{'class':'bottom'})[1]city_link_list = city_div.find_all('a')for city_link in city_link_list:city_name = city_link.textcity_pinyin = city_link['href'][1:]city_list.append((city_name,city_pinyin))return city_listdef main():city_list = get_all_cities()for city in city_list:city_name = city[0]city_pinyin = city[1]city_aqi = get_city_aqi(city_pinyin)print(city_name,city_aqi)if __name__ == '__main__':main()

功能8.0 将获取的所有城市空气质量保存成CSV数据文件

# -*- coding:utf-8 -*-""""auther:Susanfunction:1.Get the website of all cities AQI:BeautifilSoup2.Real-time AQI preservationversion:v6.0data:/5/8"""import requestsfrom bs4 import BeautifulSoupimport csvdef get_city_aqi(city_pinyin):"""Return url text"""url = 'http://pm25.in/'+city_pinyinr = requests.get(url,timeout=30)soup = BeautifulSoup(r.text,'lxml')div_list = soup.find_all('div',{'class':'span1'})city_aqi = []for i in range(8):div_content = div_list[i]caption = div_content.find('div',{'class':'caption'}).text.strip()value = div_content.find('div',{'class':'value'}).text.strip()# city_aqi.append((caption,value))city_aqi.append(value)return city_aqidef get_all_cities():"""Get the city"""url = 'http://pm25.in/'city_list = []r = requests.get(url,timeout=30)soup = BeautifulSoup(r.text,'lxml')city_div = soup.find_all('div',{'class':'bottom'})[1]city_link_list = city_div.find_all('a')for city_link in city_link_list:city_name = city_link.textcity_pinyin = city_link['href'][1:]city_list.append((city_name,city_pinyin))return city_listdef main():city_list = get_all_cities()header = ['city','AQI','PM2.5/1h','PM10/1h','CO/1h','NO2/1h','03/8h','SO2/1h']with open('China_city_aqi.csv','w',encoding='utf-8',newline='') as f:write = csv.writer(f)write.writerow(header)for i,city in enumerate(city_list):if (i+1)%10 == 0:print('Several records have been processed:{},a total of several records:{}.'.format(i+1,len(city_list)))city_name = city[0]city_pinyin = city[1]city_aqi = get_city_aqi(city_pinyin)row = [city_name]+city_aqiwrite.writerow(row)if __name__ == '__main__':main()

什么是Pandas

Pandas的数据结构

Pandas的数据操作

Pandas统计计算和描述

功能9.0 简单的数据处理和分析

结构化数据:CSV,JSON

非结构化数据:视频,图片,声音

aqi_data.sort_values(by=['AQI'])# 默认从小到达

aqi_data.sort_values(by=['AQI',ascending=False])#从大到小

# -*- coding:utf-8 -*-""""auther:Susanfunction:1.Get the website of all cities AQI:BeautifilSoup2.Real-time AQI preservationversion:v10.0data:/5/8note:python2.7"""import pandas as pddef main():aqi_data = pd.read_csv('China_city_aqi.csv')# print aqi_data.head(5)# print (aqi_data[['city','AQI']])print('Basic Information:')print(aqi_data.info())print('Data preview:')print(aqi_data.head())#Basic statisticsprint('AQI max:',aqi_data['AQI'].max)print('AQI min:',aqi_data['AQI'].max)print('AQI mean:',aqi_data['AQI'].mean())#top10top10_cities = aqi_data.sort_values(by=['AQI']).head(10)print('Ten cities with the best air quality:')print(top10_cities)#bottom10bottom_cities = aqi_data.sort_values(by=['AQI']).tail(10)#bottom_cities = aqi_data.sort_values(by=['AQI'],ascending=False).head(10)print('Ten cities with the worst air quality:')print( bottom_cities)#Save as CSVtop10_cities.to_csv('top10_aqi.csv')bottom_cities.to_csv('bottom10_aqi.csv')if __name__ == '__main__':main()

功能10.0 数据清洗和可视化

• 数据清洗;利用Pandas进行数据可视化

数据获取(网络爬虫)--->数据清洗(只保留AQI>0的数据)

plot(kind, x, y, title, figsize) #kind指定绘制图像类型

/claroja/article/details/73872066 plot属性设置

/p/33f843a7cef5 plot教程

/qq_37904945/article/details/79818719 无法显示中文字体的问题

终端输入(系统中的中文字体所在的位置):fc-list :lang=zh在python用绝对路径来引用字体:

import matplotlib.pyplot as plt

import matplotlib as mpl

zhfont= mpl.font_manager.FontProperties(fname='/usr/share/fonts/truetype/arphic/ukai.ttc')

plt.plot([1, 2, 3])

plt.xlabel('x轴标签', fontproperties=zhfont)

plt.ylabel('y轴标签',fontproperties=zhfont)

plt.show()

# -*- coding:utf-8 -*-""""auther:Susanfunction:1.Get the website of all cities AQI:BeautifilSoup2.Real-time AQI preservation3.Plot Top5version:v10.0data:/5/8note:python2.7"""import pandas as pdimport matplotlib.pyplot as pltimport matplotlib as mpl# plt.rcParams['font.sans-serif'] = ['SimHei']# plt.rcParams['axes.unicode_minus'] = False# Linux useless to display as Chinesedef main():aqi_data = pd.read_csv('China_city_aqi.csv')# print aqi_data.head(5)# print (aqi_data[['city','AQI']])print('Basic Information:')print(aqi_data.info())print('Data preview:')print(aqi_data.head())# filter_condition = aqi_data['AQI']>0# clean_aqi_data = aqi_data(filter_condition)clean_aqi_data = aqi_data[aqi_data['AQI']>0]#Basic statisticsprint('AQI max:',clean_aqi_data['AQI'].max)print('AQI min:',clean_aqi_data['AQI'].max)print('AQI mean:',clean_aqi_data['AQI'].mean())#top50# font = mpl.font_manager.FontProperties(fname='/usr/share/fonts/truetype/arphic/ukai.ttc')font = mpl.font_manager.FontProperties(fname='/usr/share/fonts/opentype/noto/NotoSansCJK-Bold.ttc')top50_cities = clean_aqi_data.sort_values(by=['AQI']).head(50)top50_cities.plot(kind='bar',x='City',y='AQI',title='Fifty cities with the best air quality',figsize=(20,10))plt.xticks(fontproperties=font)plt.xlabel(u"城市", fontproperties=font)plt.xlabel(u"空气质量", fontproperties=font)plt.savefig('Top50_api.png')plt.show()top50_cities.to_csv('top50_aqi.csv')if __name__ == '__main__':main()

如果觉得《python小象学院: JSON文件 /网络爬虫/Pandas-----空气质量描述》对你有帮助,请点赞、收藏,并留下你的观点哦!

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。