1 引言
介绍如何利用R进行空间数据分析。
同样地,本练习将以空气质量数据获取分析为例,介绍R在空间数据分析中的应用。
2 空气污染制图
初始化
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 2.2.1 √ purrr 0.2.4
## √ tibble 1.4.2 √ dplyr 0.7.4
## √ tidyr 0.8.0 √ stringr 1.2.0
## √ readr 1.1.1 √ forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, proj.4 4.9.3
library(ggmap)
library(automap)
## Loading required package: sp
2.1 读取和预览数据
df <- read_csv("aqi.csv")
## Parsed with column specification:
## cols(
## lat = col_double(),
## lon = col_double(),
## city = col_character(),
## idx = col_integer(),
## stamp = col_integer(),
## pol = col_character(),
## x = col_character(),
## aqi = col_character(),
## tz = col_character(),
## utime = col_datetime(format = ""),
## img = col_character()
## )
head(df)
## # A tibble: 6 x 11
## lat lon city idx stamp pol x aqi tz
## <dbl> <dbl> <chr> <int> <int> <chr> <chr> <chr> <chr>
## 1 25.7 100. Dali City Environmenta~ 1387 1.52e9 pm25 7223 89 +0800
## 2 20.0 79.3 Chandrapur, Chandrapur~ 2813 1.52e9 pm25 8682 80 +0530
## 3 35.2 128. Sangdae-dong, Jinju-si~ 1778 1.52e9 pm25 4015 78 +0900
## 4 47.4 124. Anju Compound, Qiqihae~ 1144 1.52e9 pm25 3631 61 +0800
## 5 39.5 76.0 Patrol unit, Kashi (喀什~ 1377 1.52e9 pm25 7239 57 +0800
## 6 28.1 120. 青田二中, Qīngtián, Lishui~ 915 1.52e9 pm25 4830 155 +0800
## # ... with 2 more variables: utime <dttm>, img <chr>
df$aqi <- as.numeric(df$aqi)
hist(df$aqi, breaks = 30)
ggplot(df, aes(x = aqi)) +
geom_histogram() +
geom_density()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
2.2 清洗数据
利用dplyr
, tidyr
来进行数据的清晰,并学习使用管道 %>%
df <- df %>%
filter(!is.na(aqi)) %>%
select(-img) %>%
select(lon, lat, aqi) %>%
mutate(aqi = as.numeric(aqi))
ggplot(df, aes(x = lon, y = lat)) +
geom_point(aes(colour = aqi))
library(RColorBrewer)
display.brewer.all()
2.3 空间可视化
sp <- df
coordinates(sp) <- ~ lon + lat
proj4string(sp) <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
str(sp)
## Formal class 'SpatialPointsDataFrame' [package "sp"] with 5 slots
## ..@ data :Classes 'tbl_df', 'tbl' and 'data.frame': 448 obs. of 1 variable:
## .. ..$ aqi: num [1:448] 89 80 78 61 57 155 154 147 53 50 ...
## ..@ coords.nrs : int [1:2] 1 2
## ..@ coords : num [1:448, 1:2] 100.2 79.3 128.1 123.9 76 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:448] "1" "2" "3" "4" ...
## .. .. ..$ : chr [1:2] "lon" "lat"
## ..@ bbox : num [1:2, 1:2] 72.88 7.88 139.22 50.43
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:2] "lon" "lat"
## .. .. ..$ : chr [1:2] "min" "max"
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
## .. .. ..@ projargs: chr "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs +towgs84=0,0,0"
spplot(sp)
sf <- st_as_sf(sp)
plot(sf, graticule = TRUE, axes = TRUE)
terrain <-
get_map(
location = c(lon = 105, lat = 39),
zoom = 4,
maptype = "terrain",
source = "google",
messaging = FALSE
)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=39,105&zoom=4&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
colours = c( "#009966", "#ffde33", "#ff9933", "#cc0033", "#660099", "#7e0023")
ggmap(terrain) +
geom_point(data = df, aes(x = lon, y = lat, colour = aqi)) +
scale_colour_gradientn(colours = colours)
library(leaflet)
pal <- colorQuantile("YlOrRd", NULL, n=5)
leaflet(df) %>% addTiles()%>%
addCircleMarkers(~lon, ~lat , popup=df$aqi,
radius = log2(df$aqi),
stroke = FALSE, fillOpacity = 0.8,
color= ~pal(df$aqi))
2.4 空间插值
library(raster)
##
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:tidyr':
##
## extract
library(rasterVis)
## Loading required package: lattice
## Loading required package: latticeExtra
##
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
##
## layer
library(rgdal)
## rgdal: version: 1.2-18, (SVN revision 718)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/hello/Documents/R/win-library/3.4/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/hello/Documents/R/win-library/3.4/rgdal/proj
## Linking to sp version: 1.2-7
r <- raster("pop_10km.tif")
r
## class : RasterLayer
## dimensions : 406, 484, 196504 (nrow, ncol, ncell)
## resolution : 10000, 10000 (x, y)
## extent : -2630000, 2210000, 1870000, 5930000 (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=aea +lat_1=25 +lat_2=47 +lat_0=0 +lon_0=105 +x_0=0 +y_0=0 +ellps=krass +units=m +no_defs
## data source : D:\Course\ucasgisapp\pop_10km.tif
## names : pop_10km
## values : 0, 1852488 (min, max)
plot(r)
levelplot(r %>% log())
grid <- as(r, "SpatialGridDataFrame")
obs <- spTransform(x = sp, CRSobj = grid@proj4string)
library(automap)
pm25 <- autoKrige(aqi ~ 1, obs, grid,
remove_duplicates = T, debug.level = 0)
pm25.cv <- autoKrige.cv(aqi ~ 1, obs, nfold = 10)
##
|
| | 0%
|
|======= | 11%
|
|============== | 22%
|
|====================== | 33%
|
|============================= | 44%
|
|==================================== | 56%
|
|=========================================== | 67%
|
|=================================================== | 78%
|
|========================================================== | 89%
|
|=================================================================| 100%
ggplot(as.data.frame(pm25.cv$krige.cv_output), aes(x = var1.pred, y = observed)) +
geom_point() +
geom_smooth(method = lm) +
geom_abline(intercept = 0, slope = 1, colour = "red") +
xlim(0, 300) +
ylim(0, 300)
lm(pm25.cv$krige.cv_output$var1.pred ~ pm25.cv$krige.cv_output$observed - 1) %>% summary()
##
## Call:
## lm(formula = pm25.cv$krige.cv_output$var1.pred ~ pm25.cv$krige.cv_output$observed -
## 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -523.17 14.91 30.48 45.32 265.83
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## pm25.cv$krige.cv_output$observed 0.73374 0.02084 35.21 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 62.56 on 447 degrees of freedom
## Multiple R-squared: 0.735, Adjusted R-squared: 0.7344
## F-statistic: 1240 on 1 and 447 DF, p-value: < 2.2e-16
plot(pm25)