R语言抓取pm2.5数据绘制全国pm2.5分布图

  • Post author:
  • Post category:其他


library(rworldmap)

library(XML)

###Set date like yyyy-mm-dd

StartDate = format(Sys.time() – 86400 ,”%Y-%m-%d”)

EndDate = format(Sys.time() – 86400 ,”%Y-%m-%d”)

Start = Sys.time()

cat(“Obtaining data …\n”)

website = paste(“http://datacenter.mep.gov.cn/report/air_daily/air_dairy.jsp?city=&startdate=”,StartDate,”&enddate=”,EndDate,”&page=”,1,sep = “”)

pmweb = readLines(website,encoding=”UTF-8″)

pmweb = paste(pmweb,collapse = “”)

pmhtml = htmlParse(pmweb,encoding=”UTF-8″)

totalPages = as.numeric(xmlValue(getNodeSet(pmhtml,”//font[@color=’#004e98′]”)[[2]]))

cat(“\tThere are”,totalPages-1,”pages need to be downloaded …\n”)

pmtotal = getNodeSet(pmhtml,”//table/tr/td”)

pmvalue = sapply(X=pmtotal,FUN=xmlValue)

pmvalue = pmvalue[-1:(-match(x = “首要污染物”,table = pmvalue))]

pmvalue = pmvalue[1:((1:length(pmvalue))[grepl(pattern = “记录总数”,x = pmvalue)]-1)]

pm = matrix(pmvalue,byrow = T,ncol = 6)

pm = data.frame(city=pm[,2],date=pm[,3],pm=as.numeric(pm[,4]),type=pm[,6],degree=pm[,5])

if(totalPages>=2)

{

for(page in 2:totalPages )

{

cat(“Obtaining data for page”,page,”…\n”)

website = paste(“http://datacenter.mep.gov.cn/report/air_daily/air_dairy.jsp?city=&startdate=”,StartDate,”&enddate=”,EndDate,”&page=”,page,sep = “”)

pmweb = readLines(website,encoding=”UTF-8″)

pmweb = paste(pmweb,collapse = “”)

pmhtml = htmlParse(pmweb,encoding=”UTF-8″)

pmtotal = getNodeSet(pmhtml,”//table/tr/td”)

pmvalue = sapply(X=pmtotal,FUN=xmlValue)

pmvalue = pmvalue[-1:(-match(x = “首要污染物”,table = pmvalue))]

pmvalue = pmvalue[1:((1:length(pmvalue))[grepl(pattern = “记录总数”,x = pmvalue)]-1)]

pm.new = matrix(pmvalue,byrow = T,ncol = 6)

pm.new = data.frame(city=pm.new[,2],date=pm.new[,3],pm=as.numeric(pm.new[,4]),type=pm.new[,6],degree=pm.new[,5])

pm = rbind(pm,pm.new)

}

}

if(version$os==”mingw32″)

{

ChinaLocation = read.csv(file = “China.Cities.Location.Win.csv”)

}else

ChinaLocation = read.csv(file = “China.Cities.Location.Linux.csv”)

pmCityLocation = ChinaLocation[match(x = pm[,1],table = ChinaLocation[,1]),]

pmCity = cbind(pm,pmCityLocation)

pmCity = pmCity[complete.cases(pmCity),]

pm.max = max(pmCity$pm)

red = seq(from=0,to=1,length.out=pm.max)

green = seq(from=1,to=0,length.out=pm.max)

blue = seq(from=0,to=0,length.out=pm.max)

col = rgb(red=red,green=green,blue=blue,alpha=0.5)

cex = pmCity$pm/100 + 5

pmCity = data.frame(pmCity,color=col[pmCity$pm])

mapBubbles(dF = pmCity,

oceanCol = “skyblue”,

borderCol = “grey”,

landCol = “lightgreen”,

nameX = “GPSLontitude”,

nameY = “GPSLatitude”,

nameZSize = “pm”,

mapRegion = “China”,

nameZColour = “color”,

addColourLegend = F)

title(main=paste(StartDate,”-“,EndDate,”全国”,nrow(pmCity),”个主要城市PM2.5分布情况”,sep=””))

title(sub=paste(“数据来源:”,”中华人民共和国环境保护部数据中心 drawed by R author:Conda”,sep=””),col.sub=”grey”)

Stop = Sys.time()

Cost = Stop – Start

cat(paste(“\n\n\nIt cost”,Cost,”minutes.\n”))

转载自 http://blog.sina.com.cn/s/blog_403aa80a0101gp11.html