第 7 章 科研合作

看看我们的科研小圈子

7.1 机构匹配

需要用到强大的正则表达式

mc <- "\\[.*?\\]\\s+([^,]*),"

complete_set %>% select(C1) %>% 
                 top_n(1) %>% 
                 str_view_all(mc)

7.2 化学学科

我们就看看四川省的化学学科吧

Organ <- complete_set %>% 
         filter(Category_ESI == "CHEMISTRY") %>% 
         select(UT, C1) %>%
         mutate(a = str_extract_all(C1, mc) ) %>%
         unnest() %>% 
         #tidyr::extract(a, c("namelist", "organization"), "(\\[.*?\\])\\s+([^,]*),")
         tidyr::extract(a, "Organization", "\\[.*?\\]\\s+([^,]*),")

head(Organ)
library(widyr)
paper_org_pairs <- Organ %>% 
       pairwise_count(Organization, UT, sort = TRUE)
head(paper_org_pairs)
links <- paper_org_pairs %>% select(item1, item2, n) %>%
                             filter(n >= 50) %>%
                             rename(from = item1, to = item2, weight = n)
links
library(igraph)
library(ggraph)

links %>%
  graph_from_data_frame(directed = F) %>%
  ggraph(layout = 'kk') + 
  geom_edge_fan(aes(edge_alpha = weight, edge_width = weight)) +
  geom_node_point(size = 1) +
  geom_node_text(aes(label = name), repel = TRUE, 
                 point.padding = unit(0.2, "lines")) 

  #theme_void() +
  #facet_wrap(~ type)  

7.3 全部学科

不过瘾吗? 那我们就看看全局

Organize <- complete_set %>% 
         select(Category_ESI_cn, UT, C1) %>%
         mutate(a = str_extract_all(C1, mc) ) %>%
         unnest() %>% 
         #tidyr::extract(a, c("namelist", "organization"), "(\\[.*?\\])\\s+([^,]*),")
         tidyr::extract(a, "Organization", "\\[.*?\\]\\s+([^,]*),")

head(Organize)
library(widyr)
paper_organ_pairs <- Organize %>% 
       group_by(Category_ESI_cn) %>%
       pairwise_count(Organization, UT, sort = TRUE, upper = FALSE)
head(paper_organ_pairs)
linkss <- paper_organ_pairs %>% 
         select(item1, item2, n, Category_ESI_cn) %>%
         filter(n >= 10) %>% 
         filter(Category_ESI_cn %in% c("物理学", "化学", "数学", "工程学", "计算机科学", "材料科学")) %>%
         rename(from = item1, to = item2, weight = n)
linkss
library(igraph)
library(ggraph)

linkss %>%
  graph_from_data_frame(directed = F) %>%
  ggraph(layout = 'kk') + 
  geom_edge_fan(aes(edge_alpha = weight, 
                    edge_width = weight,
                    edge_colour = Category_ESI_cn)) +
  geom_node_point(size = 1) +
  geom_node_text(aes(label = name), repel = TRUE, 
                 point.padding = unit(0.2, "lines")) +
  theme_void() +
  facet_wrap(~ Category_ESI_cn)  

7.4 看看某个学校的呢

蓝皮书的网络图

sicnu_organize <- sicnu_set %>% 
         select(Category_ESI_cn, UT, C1) %>%
         mutate(a = str_extract_all(C1, mc) ) %>%
         unnest() %>% 
         #tidyr::extract(a, c("namelist", "organization"), "(\\[.*?\\])\\s+([^,]*),")
         tidyr::extract(a, "Organization", "\\[.*?\\]\\s+([^,]*),")

head(sicnu_organize)
sicnu_organ_pairs <- sicnu_organize %>% 
       filter(!Organization %in% c("Sichuan Normal Univ")) %>% 
       count(Category_ESI_cn, Organization) %>%
       arrange(desc(n))

head(sicnu_organ_pairs)
library(tidygraph)
library(ggraph)

  sicnu_organ_pairs  %>% 
        rename(from = Category_ESI_cn, to = Organization, weight = n) %>% 
        filter( weight > 10 ) %>%
        as_tbl_graph(directed = F) %>%
        ggraph(layout = 'graphopt') + 
        geom_edge_link(aes(edge_alpha = weight, 
                           edge_width = weight)) +
        geom_node_point(size = 1) +
        geom_node_text(aes(label = name), repel = TRUE, 
                       point.padding = unit(0.2, "lines")) 

df_pairs <- sicnu_organ_pairs  %>% 
           filter( !is.na(Category_ESI_cn) ) %>% 
           rename(from = Category_ESI_cn, to = Organization, weight = n) %>% 
           filter( weight > 4 ) 
library(googleVis)
draftsankey <- gvisSankey(df_pairs, from = "from", to = "to", weight = "weight",
              options=list(height=1000, width=600,
              sankey="{
               link:{color:{fill: 'lightgray', fillOpacity: 0.7}},
               node:{nodePadding: 5, label:{fontSize: 12}, interactivity: true, width: 20},
              }")
  )

plot(draftsankey)

7.5 思考1

把发文量加进来,学院-学科-合作,这个有意思?

7.6 思考2

类似(NBA2015)4,学校类型- 学校 - 学科贡献。这个有意思。可能要调整数据集