[关闭]
@agpwhy 2022-01-11T14:42:14.000000Z 字数 5823 阅读 245

王胖的生信笔记第三十三期:河流图

前段时间小老板看到一个这样的图

1

Donc让我去看下这个咋做。

首先不考虑直接用ggplot2去做(除非实在没有包)。于是问了一圈,有人和我说这个叫河流图(Stream Chart)。实际就是堆叠柱状图(Stack Bar Chart)的一个变种。

2

这样一看就明白了是啥意思了。

这自己咋整呢?

这里简单展示一下从TidyTuesday看到的一个示范。

配置环境

  1. # https://github.com/z3tt/TidyTuesday/blob/master/R/2020_27_ClaremontRunXMen.Rmd
  2. remotes::install_github("davidsjoberg/ggstream")
  3. library(tidyverse)
  4. library(fuzzyjoin)
  5. library(ggstream)
  6. library(colorspace)
  7. library(ggtext)
  8. library(ragg)
  9. library(cowplot)
  10. library(pdftools)

然后设置一下作图细节

  1. theme_set(theme_minimal(base_family = "Helvetica", base_size = 12))
  2. theme_update(
  3. plot.title = element_text(size = 27,
  4. face = "bold",
  5. hjust = .5,
  6. margin = margin(10, 0, 30, 0)),
  7. plot.caption = element_text(size = 9,
  8. color = "grey40",
  9. hjust = .5,
  10. margin = margin(20, 0, 5, 0)),
  11. axis.text.y = element_blank(),
  12. axis.title = element_blank(),
  13. plot.background = element_rect(fill = "grey88", color = NA),
  14. panel.background = element_rect(fill = NA, color = NA),
  15. panel.grid = element_blank(),
  16. panel.spacing.y = unit(0, "lines"),
  17. strip.text.y = element_blank(),
  18. legend.position = "bottom",
  19. legend.text = element_text(size = 9, color = "grey40"),
  20. legend.box.margin = margin(t = 30),
  21. legend.background = element_rect(color = "grey40",
  22. size = .3,
  23. fill = "grey95"),
  24. legend.key.height = unit(.25, "lines"),
  25. legend.key.width = unit(2.5, "lines"),
  26. plot.margin = margin(rep(20, 4))
  27. )

这里因为我电脑字体库的问题,所以用的是最简单的字体。

然后读取一下数据,修整清洗一下。

  1. df_char_vis <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/character_visualization.csv')
  2. df_best_chars <-
  3. tibble(
  4. rank = 1:10,
  5. char_popular = c("Wolverine", "Magneto",
  6. "Nightcrawler", "Gambit",
  7. "Storm", "Colossus",
  8. "Phoenix", "Professor X",
  9. "Iceman", "Rogue")
  10. )

这是关于X-men漫画系列的数据。里面有写名字大家熟悉,有些不行。Wolverine-金刚狼,Magneto-万磁王个,Nightcrawler-夜行者,Gambit-金牌手,Storm-暴风女,Colossus-钢力士,Phoenix-凤凰,Professor X-教授,Iceman-冰人,Rogue-罗刹女。

  1. df_best_stream <-
  2. df_char_vis %>%
  3. regex_inner_join(df_best_chars, by = c(character = "char_popular")) %>%
  4. group_by(character, char_popular, costume, rank, issue) %>%
  5. summarize_if(is.numeric, sum, na.rm = TRUE) %>%
  6. ungroup() %>%
  7. filter(rank <= 5) %>%
  8. filter(issue < 281)
  9. #这里是整理数据类型,只展示那十位角色相关数据
  10. df_smooth <-
  11. df_best_stream %>%
  12. group_by(character, char_popular, costume, rank) %>%
  13. slice(1:4) %>%
  14. mutate(
  15. issue = c(
  16. min(df_best_stream$issue) - 20,
  17. min(df_best_stream$issue) - 5,
  18. max(df_best_stream$issue) + 5,
  19. max(df_best_stream$issue) + 20
  20. ),
  21. speech = c(0, .001, .001, 0),
  22. thought = c(0, .001, .001, 0),
  23. narrative = c(0, .001, .001, 0),
  24. depicted = c(0, .001, .001, 0)
  25. )
  26. levels <- c("depicted", "speech", "thought", "narrative")
  27. df_best_stream_fct <-
  28. df_best_stream %>%
  29. bind_rows(df_smooth) %>%
  30. mutate(
  31. costume = if_else(costume == "Costume", "costumed", "casual"),
  32. char_costume = if_else(
  33. char_popular == "Storm",
  34. glue::glue("{char_popular} ({costume})"),
  35. glue::glue("{char_popular} ({costume}) ")
  36. ),
  37. char_costume = fct_reorder(char_costume, rank)
  38. ) %>%
  39. pivot_longer(
  40. cols = speech:depicted,
  41. names_to = "parameter",
  42. values_to = "value"
  43. ) %>%
  44. mutate(parameter = factor(parameter, levels = levels))

准备作图

这里是准备配色

  1. pal <- c(
  2. "#FFB400", lighten("#FFB400", .25, space = "HLS"),
  3. "#C20008", lighten("#C20008", .2, space = "HLS"),
  4. "#13AFEF", lighten("#13AFEF", .25, space = "HLS"),
  5. "#8E038E", lighten("#8E038E", .2, space = "HLS"),
  6. "#595A52", lighten("#595A52", .15, space = "HLS")
  7. )
  8. labels <-
  9. tibble(
  10. issue = 78,
  11. value = c(-21, -19, -14, -11),
  12. parameter = factor(levels, levels = levels),
  13. label = c("Depicted", "Speech\nBubbles", "Thought\nBubbles", "Narrative\nStatements")
  14. )
  15. texts <-
  16. tibble(
  17. issue = c(295, 80, 245, 127, 196),
  18. value = c(-35, 35, 30, 57, 55),
  19. parameter = c("depicted", "depicted", "thought", "speech", "speech"),
  20. text = c(
  21. '**Gambit** was introduced for the first time in issue #266 called "Gambit: Out of the Frying Pan"— nevertheless, he is the **4<sup>th</sup> most popular X-Men character**!',
  22. '**Wolverine is the most popular X-Men** and has a regular presence in the X-Men comics between 1975 and 1991.',
  23. '**Storm** is by far the most thoughtful of the five most popular X-Men characters, especially in issues #220, #223 and #265. Storm **ranks 5<sup>th</sup>**.',
  24. "**Magneto** was ranked by IGN as the *Greatest Comic Book Villain of All Time*. And even though he only appears from time to time he **ranks 2<sup>nd</sup>**—<br>4 ranks higher than his friend and opponent Professor X!",
  25. 'The **3<sup>rd</sup> most popular X-men character Nightcrawler** gets injured during the "Mutant Massacre" and fell into a coma after an attack from Riptide in issue #211.'
  26. ),
  27. char_popular = c("Gambit", "Wolverine", "Storm", "Magneto", "Nightcrawler"),
  28. costume = "costumed",
  29. vjust = c(.5, .5, .4, .36, .38)
  30. ) %>%
  31. mutate(
  32. parameter = factor(parameter, levels = levels),
  33. char_costume = if_else(
  34. char_popular == "Storm",
  35. glue::glue("{char_popular} ({costume})"),
  36. glue::glue("{char_popular} ({costume}) ")
  37. ),
  38. char_costume = factor(char_costume, levels = levels(df_best_stream_fct$char_costume))
  39. )

最后就作图

  1. g <-
  2. df_best_stream_fct %>%
  3. ggplot(aes(
  4. issue, value,
  5. color = char_costume,
  6. fill = char_costume
  7. )) +
  8. geom_stream(
  9. geom = "contour",
  10. color = "white",
  11. size = 1.25,
  12. bw = .1
  13. ) +
  14. geom_hline(yintercept = 0, color = "grey88") +
  15. geom_stream(
  16. geom = "polygon",
  17. #n_grid = 12000,
  18. bw = .1,
  19. size = 0
  20. ) +
  21. geom_vline(
  22. data = tibble(x = c(97, seq(125, 250, by = 25), 280)),
  23. aes(xintercept = x),
  24. inherit.aes = F,
  25. color = "grey88",
  26. size = .5,
  27. linetype = "dotted"
  28. ) +
  29. annotate(
  30. "rect",
  31. xmin = -Inf, xmax = 78,
  32. ymin = -Inf, ymax = Inf,
  33. fill = "grey88"
  34. ) +
  35. annotate(
  36. "rect",
  37. xmin = 299, xmax = Inf,
  38. ymin = -Inf, ymax = Inf,
  39. fill = "grey88"
  40. ) +
  41. geom_text(
  42. data = labels,
  43. aes(issue, value, label = label),
  44. inherit.aes = F,
  45. family = "Helvetica",
  46. size = 4.7,
  47. color = "grey25",
  48. fontface = "bold",
  49. lineheight = .85,
  50. hjust = 0
  51. ) +
  52. facet_grid( ## needs facet_grid for space argument
  53. parameter ~ .,
  54. scales = "free_y",
  55. space = "free"
  56. ) +
  57. scale_x_continuous(
  58. limits = c(74, NA),
  59. breaks = c(94, seq(125, 250, by = 25), 280),
  60. labels = glue::glue("Issue\n#{c(97, seq(125, 250, by = 25), 280)}"),
  61. position = "top"
  62. ) +
  63. scale_y_continuous(expand = c(.03, .03)) +
  64. scale_color_manual(
  65. expand = c(0, 0),
  66. values = pal,
  67. guide = F
  68. ) +
  69. scale_fill_manual(
  70. values = pal,
  71. name = NULL
  72. ) +
  73. coord_cartesian(clip = "off") +
  74. labs(
  75. title = "Appearance of the Five Most Popular X-Men Characters in Chris Claremont's Comics",
  76. caption = "Visualization by Cédric Scherer • Data by Claremont Run Project via Malcom Barret • Popularity Scores by ranker.com • Logo by Comicraft"
  77. )
  78. g <- g+theme(title = element_text(vjust = .5, hjust = .5))

2020_27_Xmen_clean

通过这样的可视化,可以看到金刚狼从所有统计的时间段(1975-1991)都非常有人气;万磁王人气排名第二;夜行者也是人气角色;暴风女虽然人气不如其他几人,但是在漫画中的思想描述最多;金牌手一直到266册才首次登场,但马上就有了大量存在感。

3

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注