@fanxy
2016-07-12T10:51:46.000000Z
字数 2871
阅读 2425
樊潇彦 复旦大学经济学院 经济数学
四种前缀(以正态分布 为例):
dnorm(x,mu,sigma)pnorm(x,mu,sigma)qnorm(1-alpha/2,mu,sigma)rnorm(N,mu,sigma)
x <- seq(from = -5, to = 5, by = 0.01)y <- dnorm(x)plot(x=x,y=y,type="l",col="seagreen",lwd=2,xlab="x",ylab="density\ny = dnorm(x)")grid(col="darkgrey",lwd=2)title(main="概率分布(PDF)")y <- pnorm(x)plot(x=x,y=y,type="l",col="seagreen",lwd=2, xlab="x = qnorm(y)",ylab="probability\ny = pnorm(x)") ; grid(col="darkgrey",lwd=2)title(main="累积分布(CDF)")set.seed(1)x <- rnorm(100, sd=3) # 模拟生成单变量y <- rnorm(100, mean=1) + xmn = apply(cbind(x,y), 2, mean) # 计算x和y的均值covmat= cov(cbind(x,y)) # 计算x和y的协方差library(mnormt) # 调用生成多元随机变量的包simdata=rmnorm(300,mean=mn,varcov=covmat) # 模拟生成双变量library(ggplot2)ggplot(as.data.frame(simdata), aes(x=x, y=y)) +geom_point() +geom_smooth(method="lm") +theme_bw()

library(MASS)data(Insurance) # 调用保险数据str(Insurance) # 查看数据结构attach(Insurance) # 绑定数据
# 1)连续变量mean(Holders) # 基本统计量,不能有缺失值# mean(Holders, na.rm = TRUE) # 有缺失值时用median(Holders)sd(Holders)var(Holders)quantile(Holders) # 四分位值quantile(Holders,seq(0,1,0.1)) # 十分位值library(Hmisc)describe(data.frame(District, Group, Age)) # 简单数据描述describe(data.frame(Holders, Claims))library(fBasics)basicStats(data.frame(Holders, Claims)) # 更多统计量# 2)离散变量table(Age) # 离散变量频率表
# 1)两个连续变量cor(data.frame(Holders, Claims),use="pairwise",method="pearson") # 相关系数cov(data.frame(Holders, Claims),use="pairwise",method="pearson") # 协方差矩阵# 2)一个连续变量和一个离散变量by(data.frame(Holders,Claims),Age,summary) # 按离散变量分组,对连续变量做统计描述# 3)两个离散变量table(District,Age) # 两个离散变量频率表detach(Insurance) # 解除绑定
t.text():单变量 ,双变量 var.text():单变量 ,双变量 wilcox.test()normalTest()chisq.test()ks.test()
# 以Tsay(2013)中3M公司股票对数收益率为例url="https://faculty.chicagobooth.edu/ruey.tsay/teaching/introTS/ch1data.zip"download.file(url, "ch1data.zip") # 下载存为同名文件x=read.table(unz("ch1data.zip","d-mmm-0111.txt"),header=T) # 读入数据x=x[,2] # 去掉日期# 1) 作图hist(x,nclass=30) # 直方图d1=density(x,na.rm=T) # 密度plot(d1$x,d1$y,xlab='',ylab='',type='l',main="3M公司股票对数收益率密度函数") # 画密度线rangex=range(x,na.rm=T) # 取值范围seqx=seq(rangex[1],rangex[2],.001) # x轴间隔0.001y1=dnorm(seqx,mean(x,na.rm=T),stdev(x,na.rm=T)) # 正态分布lines(seqx,y1,lty=2) # 添加正态分布线# 2) 检验basicStats(x)mean(x,na.rm=TRUE); var(x,na.rm=TRUE); stdev(x,na.rm=TRUE);t.test(x) # 均值检验 H0: x=0normalTest(x,method='jb',na.rm=T) # 正态分布检验,JB-tests3=skewness(x,na.rm =T)T=length(x)t3=s3/sqrt(6/T)pp=2*(1-pnorm(t3)) # 偏度检验(Skewness test)s4=kurtosis(x,na.rm =T)t4=s4/sqrt(24/T)pp=2*(1-pnorm(t4)) # 峰度检验(Kurtosis test)
