@fanxy
2020-03-25T12:19:10.000000Z
字数 8813
阅读 1534
樊潇彦 复旦大学经济学院 金融数据
下载数据:Ex1.rar
setwd("D:\\...\\课堂练习1")rm(list=ls())library(tidyverse)library(readxl) # 读取excel数据library(stringr) # 字符串处理library(ggplot2)library(zoo)
CAMP模型设定为:
在回归检验之前,我们要将 camp.xlsx 中的数据合并为一张数据表,其中:
rf=read_excel('camp.xlsx',sheet="无风险利率")str(rf)rf=rf%>%mutate(Trdmnt=as.yearmon(Clsdt))%>%group_by(Trdmnt)%>%summarise(rf=mean(Nrrmtdt))%>%group_by() # 月度均值rm=read_excel("camp.xlsx",sheet="市场收益率")str(rm)rm_rf=rm%>%mutate(Trdmnt=as.yearmon(Trdmnt))%>%left_join(rf,by="Trdmnt")%>% # 合并数据mutate(id=paste(Trdmnt,Markettype,sep="-")) # 标识每个市场每个月的回报率r=read_excel("camp.xlsx",sheet="个股收益率")str(r)capm=r%>%mutate(Trdmnt=as.yearmon(Trdmnt),id=paste(Trdmnt,Markettype,sep="-"))%>%select(-Trdmnt,-Markettype)%>%left_join(rm_rf,by="id")%>% # 个股所在市场该月的回报率mutate(r_ex=Mretwd-rf, # 计算个股超额回报rm_exeq=Mretwdeq-rf, # 分别计算三种市场超额回报rm_exos=Mretwdos-rf,rm_extl=Mretwdtl-rf)%>%select(Trdmnt,Stkcd,Markettype,contains("_ex"))str(capm)ggplot(capm%>%mutate(Trdmnt=as.Date(Trdmnt)),aes(Trdmnt,r_ex,color="r_ex"))+geom_line()+geom_line(aes(Trdmnt,rm_exos,color="rm_exos"))+facet_wrap(~Stkcd)+theme_bw()
打开联合国数据 gdp.xlsx,包括以下指标:
我们要分析中国与日、法、德、英、美相比,在过去48年中的GDP表现,需要作图的指标包括:
1. GDP总量(单位:万亿美元);
2. 净出口额(单位:亿美元);
3. 广义第二产业(采矿、制造和公用事业)占GDP的比重;
4. GDP占全球的份额。
gdp=read_excel("gdp.xlsx",skip=2)country=data.frame(CountryID=c(156,392,250,276,826,840),cou_cn=c("1 中国","3 日本","5 法国","4 德国","6 英国","2 美国"),stringsAsFactors = F)gdp=gdp%>%right_join(country,by="CountryID")%>%filter(IndicatorName %in% c("Gross Domestic Product (GDP)","Exports of goods and services","Imports of goods and services","Mining, Manufacturing, Utilities (ISIC C-E)"))%>%select(cou_cn,IndicatorName,'1970':'2018')%>%gather(year,value,-IndicatorName,-cou_cn)%>%spread(IndicatorName,value)%>%rename(GDP='Gross Domestic Product (GDP)',EX='Exports of goods and services',IM='Imports of goods and services',MAN='Mining, Manufacturing, Utilities (ISIC C-E)')%>%mutate(year=as.numeric(year),'GDP(单位:万亿美元)'=GDP/10^12,'净出口额(单位:亿美元)'=(EX-IM)/10^8,'采矿、制造和公用事业占GDP的比重'=MAN/GDP)%>%group_by(year)%>%mutate('GDP的全球份额'=GDP/sum(GDP))%>%group_by()%>%select(cou_cn,year,'GDP(单位:万亿美元)','净出口额(单位:亿美元)','采矿、制造和公用事业占GDP的比重','GDP的全球份额')%>%gather(var,value,-year,-cou_cn)# 作图ggplot(gdp,aes(year,value,color=cou_cn))+geom_line(size=1)+facet_wrap(~var,scales="free")+labs(title="",x="",y="")+guides(linetype=guide_legend(NULL))+geom_vline(xintercept=c(1978,1992,2001,2008),linetype=2, color="darkgrey")+scale_x_continuous(breaks = seq(1970,2018,6))+guides(color=guide_legend(title=NULL))+theme_bw()+theme(legend.position="bottom",legend.text=element_text(size=11),axis.text=element_text(size=9))
i=read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv",stringsAsFactors = F)coulist=data.frame(Reference.area=c("China","Japan","Euro area","United Kingdom","United States"),cou_cn=c("1 中国","2 日本","3 德国","4 英国", "5 美国"))library(lubridate)i5=i%>%select(-FREQ, -Frequency, -Time.Period)%>%gather(time,i,-REF_AREA,-Reference.area, na.rm=T)%>%mutate(time=gsub("X","",time),time=gsub("[.]","-",time),time=as.yearmon(time))%>%filter(Reference.area %in% coulist$Reference.area)%>%left_join(coulist,by='Reference.area')%>%arrange(cou_cn,time)%>%select(time, i, cou_cn)%>%mutate(time=as.Date(time),year=year(time))%>%group_by(year,cou_cn)%>%summarise(i=mean(i))%>%group_by()data=gdp%>%filter(var=="GDP(单位:万亿美元)")%>%select(-var)%>%rename(gdp=value)%>%left_join(i5,by=c("cou_cn","year"))%>%gather(var,value,-cou_cn,-year,na.rm=T)ggplot(data,aes(year,value,color=cou_cn))+geom_line(size=1)+facet_wrap(~var,scales="free")+labs(title="",x="",y="")+guides(linetype=guide_legend(NULL))+geom_vline(xintercept=c(1978,1992,2001,2008),linetype=2, color="darkgrey")+scale_x_continuous(breaks = seq(1970,2018,6))+guides(color=guide_legend(title=NULL))+theme_bw()+theme(legend.position="bottom",legend.text=element_text(size=11),axis.text=element_text(size=9))
gdp=read_excel("gdp.xlsx",skip=2)country=data.frame(CountryID=c(156,392,276,826,840),cou_cn=c("1 中国","2 日本","3 德国","4 英国","5 美国"),stringsAsFactors = F)gdp=gdp%>%right_join(country,by="CountryID")%>%filter(IndicatorName %in% c("Gross Domestic Product (GDP)","Exports of goods and services","Imports of goods and services","Mining, Manufacturing, Utilities (ISIC C-E)"))%>%select(cou_cn,IndicatorName,'1970':'2018')%>%gather(year,value,-IndicatorName,-cou_cn)%>%spread(IndicatorName,value)%>%rename(GDP='Gross Domestic Product (GDP)',EX='Exports of goods and services',IM='Imports of goods and services',MAN='Mining, Manufacturing, Utilities (ISIC C-E)')%>%mutate(year=as.numeric(year),'GDP(单位:万亿美元)'=GDP/10^12,'净出口额(单位:亿美元)'=(EX-IM)/10^8,'采矿、制造和公用事业占GDP的比重'=MAN/GDP)%>%group_by(year)%>%mutate('GDP的全球份额'=GDP/sum(GDP))%>%group_by()%>%select(cou_cn,year,'GDP(单位:万亿美元)')%>%gather(var,value,-year,-cou_cn)%>%select(year, cou_cn, value)%>%rename(GDP='value')i=read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv")coulist=data.frame(Reference.area=c("China","Japan","Euro area","United Kingdom","United States"),coun_cn=c("1 中国","2 日本","3 德国","4 英国", "5 美国"))i5=i%>%select(-FREQ, -Frequency, -Time.Period)%>%gather(time,i,-REF_AREA,-Reference.area, na.rm=T)%>%mutate(time=gsub("X","",time),time=gsub("[.]","-",time),time=as.yearmon(time))%>%filter(Reference.area %in% coulist$Reference.area)%>%left_join(coulist,by="Reference.area")%>%arrange(coun_cn,time)%>%mutate(year=floor(as.numeric(time)))%>%group_by(year, coun_cn)%>%summarise('i'=mean(i))%>%group_by()%>%select(year, coun_cn, i) %>%arrange(coun_cn,year) %>%rename(cou_cn='coun_cn')total = gdp %>% left_join(i5, by=c('cou_cn', 'year')) %>%select('year', 'cou_cn', 'i', 'GDP')%>%gather(var,value,-year,-cou_cn)ggplot(total,aes(year,value,color=cou_cn))+geom_line()+facet_wrap(~var)+theme_bw()
gdp=read_excel("gdp.xlsx",skip=2)country=data.frame(CountryID=c(156,392,276,826,840),coun_cn=c("1 中国","2 日本","3 欧元区","4 英国","5 美国"),stringsAsFactors = F)gdp=gdp%>%right_join(country,by="CountryID")%>%filter(IndicatorName %in% c("Gross Domestic Product (GDP)","Exports of goods and services","Imports of goods and services","Mining, Manufacturing, Utilities (ISIC C-E)"))%>%select(coun_cn,IndicatorName,'1970':'2018')%>%gather(year,value,-IndicatorName,-coun_cn)%>%spread(IndicatorName,value)%>%rename(GDP='Gross Domestic Product (GDP)',EX='Exports of goods and services',IM='Imports of goods and services',MAN='Mining, Manufacturing, Utilities (ISIC C-E)') %>%select(-EX,-IM,-MAN)coulist=data.frame(Reference.area=c("China","Japan","Euro area","United Kingdom","United States"),coun_cn=c("1 中国","2 日本","3 欧元区","4 英国","5 美国"))i <- read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv", header=T, skip=0)i5 = i%>%select(-FREQ, -Frequency, -Time.Period)%>%gather(time,i,-REF_AREA,-Reference.area, na.rm=T)%>%mutate(time=gsub("X","",time),time=gsub("[.]","-",time),time=as.yearmon(time))%>%filter(Reference.area%in%coulist$Reference.area)%>%left_join(coulist,by="Reference.area")%>%arrange(coun_cn,time) %>%mutate(year=year(time),)%>%group_by(year,coun_cn)%>%mutate(i_year=i/sum(i))%>%ungroup()%>%select(coun_cn, year, i_year)gdp$year <- as.numeric(gdp$year)i5 <- i5%>%left_join(gdp,by=c("year","coun_cn"))ggplot(i5, aes(time,i_year,GDP))+geom_line()
gdp=read_excel("gdp.xlsx",skip=2)country=data.frame(CountryID=c(156,392,250,276,826,840),cou_cn=c("1 中国","3 日本","5 法国","4 德国","6 英国","2 美国"),stringsAsFactors = F)gdp1=gdp%>%right_join(country,by="CountryID")%>%filter(IndicatorName %in% c("Gross Domestic Product (GDP)","Exports of goods and services","Imports of goods and services","Mining, Manufacturing, Utilities (ISIC C-E)"))%>%select(cou_cn,IndicatorName,'1970':'2018')%>% #如果指标名里面数字开头或者gather(year,value,-IndicatorName,-cou_cn)%>%spread(IndicatorName,value)%>%rename(GDP='Gross Domestic Product (GDP)',EX='Exports of goods and services',IM='Imports of goods and services',MAN='Mining, Manufacturing, Utilities (ISIC C-E)')%>%mutate(year=as.numeric(year),'GDP(单位:万亿美元)'=GDP/10^12,'净出口额(单位:亿美元)'=(EX-IM)/10^8,'采矿、制造和公用事业占GDP的比重'=MAN/GDP)%>%group_by(year)%>%mutate('GDP的全球份额'=GDP/sum(GDP))%>%group_by()%>%select(cou_cn,year,'GDP(单位:万亿美元)','净出口额(单位:亿美元)','采矿、制造和公用事业占GDP的比重','GDP的全球份额')%>%gather(var,value,-year,-cou_cn)data=read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv")coulist=data.frame(Reference.area=c("China","United States","Japan","Euro area","United Kingdom"),cou_cn=c("1 中国","2 美国","3 日本","4 德国","6 英国"))i=data %>%select(-FREQ,-Frequency,-Time.Period) %>%gather(time, i, -REF_AREA,-Reference.area, na.rm =T) %>%mutate(time=gsub("X","",time),time=gsub("[.]","-",time),time=as.yearmon(time))%>%filter(Reference.area %in% coulist$Reference.area)%>%left_join(coulist,by="Reference.area")%>%mutate(time=as.Date(time),year=year(time))%>%group_by(year,cou_cn)%>%summarise(i=mean(c(year,cou_cn)))%>%group_by()gdp2=gdp1%>%filter(var %in% "GDP(单位:万亿美元)")%>%spread(var,value)i_gdp=gdp2%>%left_join(i,by=c("cou_cn",'year'))%>%arrange(cou_cn,time)