@fanxy 2020-03-15T08:52:43.000000Z 字数 8850 阅读 5069

# 第三讲 数据可视化与编程基础

樊潇彦 复旦大学经济学院 金融数据

# 0. 准备工作

setwd("D:\\...\\Ch03")          # 设定工作目录，注意为/或\\rm(list=ls())                   # 清内存## 调用之前已安装且当前要用的包library(tidyverse)library(readstata13)library(haven) library(readxl)## 安装和调用本节要用的包install.packages(c("ggplot2","ggvis","shiny","dygraphs"))library(ggplot2)library(ggvis)library(shiny)library(dygraphs)

# 1. 数据可视化

## 1.5 dygraphs

library(dygraphs) # dyAnnotation：标注dygraph(presidents, main = "Presidential Approval") %>%  dyAxis("y", valueRange = c(0, 100)) %>%  dyAnnotation("1950-7-1", text = "A", tooltip = "Korea") %>%  dyAnnotation("1965-1-1", text = "B", tooltip = "Vietnam")# dyAxis：坐标轴dygraph(nhtemp, main = "New Haven Temperatures") %>%  dyAxis("y", label = "Temp (F)", valueRange = c(40, 60)) %>%  dyOptions(axisLineWidth = 1.5, fillGraph = TRUE, drawGrid = FALSE)# dyEvent：事件dygraph(presidents, main = "Presidential Approval") %>%  dyAxis("y", valueRange = c(0, 100)) %>%  dyEvent("1950-6-30", "Korea", labelLoc = "bottom") %>%  dyEvent("1965-2-09", "Vietnam", labelLoc = "bottom")# dyHighlight：提亮lungDeaths <- cbind(ldeaths, mdeaths, fdeaths)dygraph(lungDeaths, main = "Deaths from Lung Disease (UK)") %>%  dyHighlight(highlightCircleSize = 5,              highlightSeriesBackgroundAlpha = 0.2,              hideOnMouseOut = FALSE)# dyLegend：标签dygraph(nhtemp, main = "New Haven Temperatures") %>%  dySeries("V1", label = "Temperature (F)") %>%  dyLegend(show = "always", hideOnMouseOut = FALSE)# dyLimit：极值dygraph(presidents, main = "Presidential Approval") %>%  dyAxis("y", valueRange = c(0, 100)) %>%  dyLimit(max(presidents, na.rm = TRUE), "Max",          strokePattern = "solid", color = "blue")# dyOptions：选项dygraph(lungDeaths) %>% dyRangeSelector()dygraph(lungDeaths) %>%  dySeries("mdeaths", label = "Male") %>%  dySeries("fdeaths", label = "Female") %>%  dyOptions(stackedGraph = TRUE) %>%  dyRangeSelector(height = 20)hw <- HoltWinters(ldeaths)predicted <- predict(hw, n.ahead = 72, prediction.interval = TRUE)dygraph(predicted, main = "Predicted Lung Deaths (UK)") %>%  dyAxis("x", drawGrid = FALSE) %>%  dySeries(c("lwr", "fit", "upr"), label = "Deaths") %>%  dyOptions(colors = RColorBrewer::brewer.pal(3, "Set1"))# dyRangeSelector：时间区dygraph(nhtemp, main = "New Haven Temperatures") %>%  dyRangeSelector()dygraph(nhtemp, main = "New Haven Temperatures") %>%  dyRangeSelector(dateWindow = c("1920-01-01", "1960-01-01"))dygraph(nhtemp, main = "New Haven Temperatures") %>%  dyRangeSelector(height = 20, strokeColor = "")# dyRoller：滚动平滑# Y values are averaged over the specified number of time scale units.dygraph(discoveries, main = "Important Discoveries") %>%  dyRoller(rollPeriod = 5)# dyShading：阴影区dygraph(nhtemp, main = "New Haven Temperatures") %>%  dyShading(from = "1920-1-1", to = "1930-1-1") %>%  dyShading(from = "1940-1-1", to = "1950-1-1")dygraph(nhtemp, main = "New Haven Temperatures") %>%  dyShading(from = "48", to = "52", axis = "y") %>%  dyShading(from = "50", to = "50.1", axis = "y", color = "black")

# 2. 编程基础

## 2.1 通过赋值生成一个新变量

x  <- 1.5cat("x = ",x,"\n",sep="")    # 屏幕显示，也可用于测试程序y1 <- c(1.5,2.3,8.6,7.4,9.2)y2 <- c("MSFT","GOOG","AAPL")y3 <- c(T,F,T,T,F,F)3.1415926 -> z;              # 数据在左，变量名在右赋值，但比较少用assign("t",1.414)            # assign()函数给变量赋值

## 2.2 分支结构：if, if-else

# ifa <- 1if(a==1) print("a==1")a <- 2if(a > 1)  print("a > 1")   else   print("a <= 1")a <- 3if( a == 1){   a          # 不会显示 a 的值  print("I am a boy!")  }else{     # 如果有多行命令，需要用{}引起来，else必须紧跟在}后面  print(a)   # 会显示 a 的值  print("I am a girl!")    }

## 2.3 多重分支结构：if, ifelse, switch

# 1) if - else ifa <- 4if( a == 1){   print("a == 1")}else if( a == 2)   # 同样每个else必须和前面的}紧紧粘在一起{   print("a == 2")}else{  print("Not 1 & 2")}# 2) ifelse()计算第一个逻辑表达式得到结果如果为T则返回第二个参数；否则返回第三个参数a <- 2ifelse(a > 1,3.1416,1.414) # 3) switch语句的多重分支结构switch(a,       print("选项1"),       print("选项2"),       print("选项3"),       print("选项4"),       print("选项5")       )

## 2.4 循环结构: for, while, repeat

# 1) foriTotal <-  0for(i in 1:100)                # 用关键词in枚举向量中的每一整数{   iTotal <- iTotal + i} cat("1-100的累加和为：",iTotal,"\n",sep="")szSymbols <- c("MSFT","GOOG","AAPL","INTL","ORCL","SYMC")for(SymbolName in szSymbols)           # 字符串也可以枚举{   cat(SymbolName,"\n",sep="")}# 2) whilei <- 1iTotal <- 0while(i <= 100){   iTotal <- iTotal + i   i <- i + 1}cat("1-100的累加和为：",iTotal,"\n",sep="")  # 屏幕显示结果# 3) repeati <- 1iTotal <- 0repeat                                       # 无条件循环，必须在程序内部设法退出{  iTotal <- iTotal + i  i <- i + 1  if(i <= 100) next else break               # 注意：next,break的用法}cat("1-100的累加和为：",iTotal,"\n",sep="")

## 2.5 自定义函数 function

# 对于小函数，可写好后直接调用。如计算矩阵的幂：mat_power = function(A, n){  Apower=A  for (i in 2:n) Apower= Apower %*% A  return(Apower)}A = matrix(c(1:4),2)mat_power(A, 3)A %*% A %*% A# 对于较大的函数，要另存为.r 文件，再调用。rm("mat_power")source("myfun.r")        # 调用自编程序mat_power(A, 3)

## 2.6 获取帮助信息

?print                # 在RStudio右侧打开相关帮助界面example(print)        # 命令示例?quantmod             # 打开扩展包整体帮助信息apropos("print*")     # 在搜索路径下查找满足正则表达式的所有函数信息demo(graphics)# 如果对包或命令的具体名称不清楚，可以从 google 或 http://rseek.org/ 上查找。
1. R.I. Kabacoff著：《R语言实战（第2版）》，王小宁、刘撷芯、黄俊文译，人民邮电出版社，2016
2. Rstudio: ggplot2 Cheat Sheet
3. Roger D. Peng：Exploratory Data Analysis, Lecture Notes
4. The DataCamp Team: Questions All R Users Have About Plots

• 私有
• 公开
• 删除