[关闭]
@fanxy 2020-03-25T12:19:10.000000Z 字数 8813 阅读 641

课堂练习1:数据处理与可视化

樊潇彦 复旦大学经济学院 金融数据


0. 准备工作

下载数据:Ex1.rar

  1. setwd("D:\\...\\课堂练习1")
  2. rm(list=ls())
  3. library(tidyverse)
  4. library(readxl) # 读取excel数据
  5. library(stringr) # 字符串处理
  6. library(ggplot2)
  7. library(zoo)

1. CAMP模型数据整理

CAMP模型设定为:

在回归检验之前,我们要将 camp.xlsx 中的数据合并为一张数据表,其中:

  1. rf=read_excel('camp.xlsx',sheet="无风险利率")
  2. str(rf)
  3. rf=rf%>%
  4. mutate(Trdmnt=as.yearmon(Clsdt))%>%
  5. group_by(Trdmnt)%>%summarise(rf=mean(Nrrmtdt))%>%group_by() # 月度均值
  6. rm=read_excel("camp.xlsx",sheet="市场收益率")
  7. str(rm)
  8. rm_rf=rm%>%
  9. mutate(Trdmnt=as.yearmon(Trdmnt))%>%
  10. left_join(rf,by="Trdmnt")%>% # 合并数据
  11. mutate(id=paste(Trdmnt,Markettype,sep="-")) # 标识每个市场每个月的回报率
  12. r=read_excel("camp.xlsx",sheet="个股收益率")
  13. str(r)
  14. capm=r%>%
  15. mutate(Trdmnt=as.yearmon(Trdmnt),
  16. id=paste(Trdmnt,Markettype,sep="-"))%>%
  17. select(-Trdmnt,-Markettype)%>%
  18. left_join(rm_rf,by="id")%>% # 个股所在市场该月的回报率
  19. mutate(r_ex=Mretwd-rf, # 计算个股超额回报
  20. rm_exeq=Mretwdeq-rf, # 分别计算三种市场超额回报
  21. rm_exos=Mretwdos-rf,
  22. rm_extl=Mretwdtl-rf)%>%
  23. select(Trdmnt,Stkcd,Markettype,contains("_ex"))
  24. str(capm)
  25. ggplot(capm%>%mutate(Trdmnt=as.Date(Trdmnt)),
  26. aes(Trdmnt,r_ex,color="r_ex"))+
  27. geom_line()+
  28. geom_line(aes(Trdmnt,rm_exos,color="rm_exos"))+
  29. facet_wrap(~Stkcd)+
  30. theme_bw()

2. 各国GDP分析

打开联合国数据 gdp.xlsx,包括以下指标:

我们要分析中国与日、法、德、英、美相比,在过去48年中的GDP表现,需要作图的指标包括:
1. GDP总量(单位:万亿美元);
2. 净出口额(单位:亿美元);
3. 广义第二产业(采矿、制造和公用事业)占GDP的比重;
4. GDP占全球的份额。

  1. gdp=read_excel("gdp.xlsx",skip=2)
  2. country=data.frame(CountryID=c(156,392,250,276,826,840),
  3. cou_cn=c("1 中国","3 日本","5 法国","4 德国","6 英国","2 美国"),
  4. stringsAsFactors = F)
  5. gdp=gdp%>%
  6. right_join(country,by="CountryID")%>%
  7. filter(IndicatorName %in% c("Gross Domestic Product (GDP)",
  8. "Exports of goods and services",
  9. "Imports of goods and services",
  10. "Mining, Manufacturing, Utilities (ISIC C-E)"))%>%
  11. select(cou_cn,IndicatorName,'1970':'2018')%>%
  12. gather(year,value,-IndicatorName,-cou_cn)%>%
  13. spread(IndicatorName,value)%>%
  14. rename(GDP='Gross Domestic Product (GDP)',
  15. EX='Exports of goods and services',
  16. IM='Imports of goods and services',
  17. MAN='Mining, Manufacturing, Utilities (ISIC C-E)')%>%
  18. mutate(year=as.numeric(year),
  19. 'GDP(单位:万亿美元)'=GDP/10^12,
  20. '净出口额(单位:亿美元)'=(EX-IM)/10^8,
  21. '采矿、制造和公用事业占GDP的比重'=MAN/GDP)%>%
  22. group_by(year)%>%mutate('GDP的全球份额'=GDP/sum(GDP))%>%group_by()%>%
  23. select(cou_cn,year,'GDP(单位:万亿美元)','净出口额(单位:亿美元)','采矿、制造和公用事业占GDP的比重','GDP的全球份额')%>%
  24. gather(var,value,-year,-cou_cn)
  25. # 作图
  26. ggplot(gdp,aes(year,value,color=cou_cn))+
  27. geom_line(size=1)+
  28. facet_wrap(~var,scales="free")+
  29. labs(title="",x="",y="")+
  30. guides(linetype=guide_legend(NULL))+
  31. geom_vline(xintercept=c(1978,1992,2001,2008),linetype=2, color="darkgrey")+
  32. scale_x_continuous(breaks = seq(1970,2018,6))+
  33. guides(color=guide_legend(title=NULL))+theme_bw()+
  34. theme(legend.position="bottom",
  35. legend.text=element_text(size=11),
  36. axis.text=element_text(size=9))

3. 各国GDP与基准利率

  1. i=read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv",stringsAsFactors = F)
  2. coulist=data.frame(Reference.area=c("China","Japan","Euro area","United Kingdom","United States"),
  3. cou_cn=c("1 中国","2 日本","3 德国","4 英国", "5 美国"))
  4. library(lubridate)
  5. i5=i%>%select(-FREQ, -Frequency, -Time.Period)%>%
  6. gather(time,i,-REF_AREA,-Reference.area, na.rm=T)%>%
  7. mutate(time=gsub("X","",time),
  8. time=gsub("[.]","-",time),
  9. time=as.yearmon(time))%>%
  10. filter(Reference.area %in% coulist$Reference.area)%>%
  11. left_join(coulist,by='Reference.area')%>%
  12. arrange(cou_cn,time)%>%
  13. select(time, i, cou_cn)%>%
  14. mutate(time=as.Date(time),year=year(time))%>%
  15. group_by(year,cou_cn)%>%summarise(i=mean(i))%>%group_by()
  16. data=gdp%>%
  17. filter(var=="GDP(单位:万亿美元)")%>%select(-var)%>%
  18. rename(gdp=value)%>%left_join(i5,by=c("cou_cn","year"))%>%
  19. gather(var,value,-cou_cn,-year,na.rm=T)
  20. ggplot(data,aes(year,value,color=cou_cn))+
  21. geom_line(size=1)+
  22. facet_wrap(~var,scales="free")+
  23. labs(title="",x="",y="")+
  24. guides(linetype=guide_legend(NULL))+
  25. geom_vline(xintercept=c(1978,1992,2001,2008),linetype=2, color="darkgrey")+
  26. scale_x_continuous(breaks = seq(1970,2018,6))+
  27. guides(color=guide_legend(title=NULL))+theme_bw()+
  28. theme(legend.position="bottom",
  29. legend.text=element_text(size=11),
  30. axis.text=element_text(size=9))

夏铭松

  1. gdp=read_excel("gdp.xlsx",skip=2)
  2. country=data.frame(CountryID=c(156,392,276,826,840),
  3. cou_cn=c("1 中国","2 日本","3 德国","4 英国","5 美国"),
  4. stringsAsFactors = F)
  5. gdp=gdp%>%
  6. right_join(country,by="CountryID")%>%
  7. filter(IndicatorName %in% c("Gross Domestic Product (GDP)",
  8. "Exports of goods and services",
  9. "Imports of goods and services",
  10. "Mining, Manufacturing, Utilities (ISIC C-E)"))%>%
  11. select(cou_cn,IndicatorName,'1970':'2018')%>%
  12. gather(year,value,-IndicatorName,-cou_cn)%>%
  13. spread(IndicatorName,value)%>%
  14. rename(GDP='Gross Domestic Product (GDP)',
  15. EX='Exports of goods and services',
  16. IM='Imports of goods and services',
  17. MAN='Mining, Manufacturing, Utilities (ISIC C-E)')%>%
  18. mutate(year=as.numeric(year),
  19. 'GDP(单位:万亿美元)'=GDP/10^12,
  20. '净出口额(单位:亿美元)'=(EX-IM)/10^8,
  21. '采矿、制造和公用事业占GDP的比重'=MAN/GDP)%>%
  22. group_by(year)%>%mutate('GDP的全球份额'=GDP/sum(GDP))%>%group_by()%>%
  23. select(cou_cn,year,'GDP(单位:万亿美元)')%>%
  24. gather(var,value,-year,-cou_cn)%>%
  25. select(year, cou_cn, value)%>%
  26. rename(GDP='value')
  27. i=read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv")
  28. coulist=data.frame(Reference.area=c("China","Japan","Euro area","United Kingdom","United States"),
  29. coun_cn=c("1 中国","2 日本","3 德国","4 英国", "5 美国"))
  30. i5=i%>%select(-FREQ, -Frequency, -Time.Period)%>%
  31. gather(time,i,-REF_AREA,-Reference.area, na.rm=T)%>%
  32. mutate(time=gsub("X","",time),
  33. time=gsub("[.]","-",time),
  34. time=as.yearmon(time))%>%
  35. filter(Reference.area %in% coulist$Reference.area)%>%
  36. left_join(coulist,by="Reference.area")%>%
  37. arrange(coun_cn,time)%>%
  38. mutate(year=floor(as.numeric(time)))%>%
  39. group_by(year, coun_cn)%>%summarise('i'=mean(i))%>%group_by()%>%
  40. select(year, coun_cn, i) %>%
  41. arrange(coun_cn,year) %>%
  42. rename(cou_cn='coun_cn')
  43. total = gdp %>% left_join(i5, by=c('cou_cn', 'year')) %>%
  44. select('year', 'cou_cn', 'i', 'GDP')%>%
  45. gather(var,value,-year,-cou_cn)
  46. ggplot(total,
  47. aes(year,value,color=cou_cn))+
  48. geom_line()+
  49. facet_wrap(~var)+
  50. theme_bw()

苏良

  1. gdp=read_excel("gdp.xlsx",skip=2)
  2. country=data.frame(CountryID=c(156,392,276,826,840),
  3. coun_cn=c("1 中国","2 日本","3 欧元区","4 英国","5 美国"),
  4. stringsAsFactors = F)
  5. gdp=gdp%>%
  6. right_join(country,by="CountryID")%>%
  7. filter(IndicatorName %in% c("Gross Domestic Product (GDP)",
  8. "Exports of goods and services",
  9. "Imports of goods and services",
  10. "Mining, Manufacturing, Utilities (ISIC C-E)"))%>%
  11. select(coun_cn,IndicatorName,'1970':'2018')%>%
  12. gather(year,value,-IndicatorName,-coun_cn)%>%
  13. spread(IndicatorName,value)%>%
  14. rename(GDP='Gross Domestic Product (GDP)',
  15. EX='Exports of goods and services',
  16. IM='Imports of goods and services',
  17. MAN='Mining, Manufacturing, Utilities (ISIC C-E)') %>%
  18. select(-EX,-IM,-MAN)
  19. coulist=data.frame(Reference.area=c("China","Japan","Euro area","United Kingdom","United States"),
  20. coun_cn=c("1 中国","2 日本","3 欧元区","4 英国","5 美国"))
  21. i <- read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv", header=T, skip=0)
  22. i5 = i%>%select(-FREQ, -Frequency, -Time.Period)%>%
  23. gather(time,i,-REF_AREA,-Reference.area, na.rm=T)%>%
  24. mutate(time=gsub("X","",time),
  25. time=gsub("[.]","-",time),
  26. time=as.yearmon(time))%>%
  27. filter(Reference.area%in%coulist$Reference.area)%>%
  28. left_join(coulist,by="Reference.area")%>%
  29. arrange(coun_cn,time) %>%
  30. mutate(year=year(time),)%>%
  31. group_by(year,coun_cn)%>%mutate(i_year=i/sum(i))%>%ungroup()%>%
  32. select(coun_cn, year, i_year)
  33. gdp$year <- as.numeric(gdp$year)
  34. i5 <- i5%>%left_join(gdp,by=c("year","coun_cn"))
  35. ggplot(i5, aes(time,i_year,GDP))+geom_line()

李柯

  1. gdp=read_excel("gdp.xlsx",skip=2)
  2. country=data.frame(CountryID=c(156,392,250,276,826,840),
  3. cou_cn=c("1 中国","3 日本","5 法国","4 德国","6 英国","2 美国"),
  4. stringsAsFactors = F)
  5. gdp1=gdp%>%
  6. right_join(country,by="CountryID")%>%
  7. filter(IndicatorName %in% c("Gross Domestic Product (GDP)",
  8. "Exports of goods and services",
  9. "Imports of goods and services",
  10. "Mining, Manufacturing, Utilities (ISIC C-E)"))%>%
  11. select(cou_cn,IndicatorName,'1970':'2018')%>% #如果指标名里面数字开头或者
  12. gather(year,value,-IndicatorName,-cou_cn)%>%
  13. spread(IndicatorName,value)%>%
  14. rename(GDP='Gross Domestic Product (GDP)',
  15. EX='Exports of goods and services',
  16. IM='Imports of goods and services',
  17. MAN='Mining, Manufacturing, Utilities (ISIC C-E)')%>%
  18. mutate(year=as.numeric(year),
  19. 'GDP(单位:万亿美元)'=GDP/10^12,
  20. '净出口额(单位:亿美元)'=(EX-IM)/10^8,
  21. '采矿、制造和公用事业占GDP的比重'=MAN/GDP)%>%
  22. group_by(year)%>%mutate('GDP的全球份额'=GDP/sum(GDP))%>%group_by()%>%
  23. select(cou_cn,year,'GDP(单位:万亿美元)','净出口额(单位:亿美元)','采矿、制造和公用事业占GDP的比重','GDP的全球份额')%>%
  24. gather(var,value,-year,-cou_cn)
  25. data=read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv")
  26. coulist=data.frame(Reference.area=c("China","United States","Japan","Euro area","United Kingdom"),
  27. cou_cn=c("1 中国","2 美国","3 日本","4 德国","6 英国"))
  28. i=data %>%
  29. select(-FREQ,-Frequency,-Time.Period) %>%
  30. gather(time, i, -REF_AREA,-Reference.area, na.rm =T) %>%
  31. mutate(time=gsub("X","",time),time=gsub("[.]","-",time),time=as.yearmon(time))%>%
  32. filter(Reference.area %in% coulist$Reference.area)%>%
  33. left_join(coulist,by="Reference.area")%>%
  34. mutate(time=as.Date(time),year=year(time))%>%
  35. group_by(year,cou_cn)%>%summarise(i=mean(c(year,cou_cn)))%>%group_by()
  36. gdp2=gdp1%>%
  37. filter(var %in% "GDP(单位:万亿美元)")%>%
  38. spread(var,value)
  39. i_gdp=gdp2%>%
  40. left_join(i,by=c("cou_cn",'year'))%>%
  41. arrange(cou_cn,time)
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注