第 7 章 科研合作

看看我们的科研小圈子

7.1 机构匹配

需要用到强大的正则表达式

mc <- "\\[.*?\\]\\s+([^,]*),"

complete_set %>% select(C1) %>% 
                 top_n(1) %>% 
                 str_view_all(mc)
  • Zhejiang Univ, Dept Food Sci & Nutr, Hangzhou 310029, Zhejiang, Peoples R China; Sichuan Agr Univ, Dept Food Sci, Yaan, Sichuan, Peoples R China; C&H Sugar Co, Crockett, CA 94525 USA

7.2 化学学科

我们就看看四川省的化学学科吧

Organ <- complete_set %>% 
         filter(Category_ESI == "CHEMISTRY") %>% 
         select(UT, C1) %>%
         mutate(a = str_extract_all(C1, mc) ) %>%
         unnest() %>% 
         #tidyr::extract(a, c("namelist", "organization"), "(\\[.*?\\])\\s+([^,]*),")
         tidyr::extract(a, "Organization", "\\[.*?\\]\\s+([^,]*),")

head(Organ)
ABCDEFGHIJ0123456789
UT
<chr>
WOS:000388900200004
WOS:000388900200004
WOS:000388900200004
WOS:000388900200004
WOS:000387416500013
WOS:000387416500013
library(widyr)
paper_org_pairs <- Organ %>% 
       pairwise_count(Organization, UT, sort = TRUE)
head(paper_org_pairs)
ABCDEFGHIJ0123456789
item1
<chr>
item2
<chr>
n
<dbl>
China Acad Engn PhysSouthwest Univ Sci & Technol106
Southwest Univ Sci & TechnolChina Acad Engn Phys106
Sichuan Normal UnivSichuan Univ70
Xihua UnivSichuan Univ70
Sichuan UnivSichuan Normal Univ70
Sichuan UnivXihua Univ70
links <- paper_org_pairs %>% select(item1, item2, n) %>%
                             filter(n >= 50) %>%
                             rename(from = item1, to = item2, weight = n)
links
ABCDEFGHIJ0123456789
from
<chr>
to
<chr>
weight
<dbl>
China Acad Engn PhysSouthwest Univ Sci & Technol106
Southwest Univ Sci & TechnolChina Acad Engn Phys106
Sichuan Normal UnivSichuan Univ70
Xihua UnivSichuan Univ70
Sichuan UnivSichuan Normal Univ70
Sichuan UnivXihua Univ70
Univ GeorgiaXihua Univ70
Xihua UnivUniv Georgia70
Sichuan UnivChengdu Univ Technol65
Chengdu Univ TechnolSichuan Univ65
library(igraph)
library(ggraph)

links %>%
  graph_from_data_frame(directed = F) %>%
  ggraph(layout = 'kk') + 
  geom_edge_fan(aes(edge_alpha = weight, edge_width = weight)) +
  geom_node_point(size = 1) +
  geom_node_text(aes(label = name), repel = TRUE, 
                 point.padding = unit(0.2, "lines")) 

  #theme_void() +
  #facet_wrap(~ type)  

7.3 全部学科

不过瘾吗? 那我们就看看全局

Organize <- complete_set %>% 
         select(Category_ESI_cn, UT, C1) %>%
         mutate(a = str_extract_all(C1, mc) ) %>%
         unnest() %>% 
         #tidyr::extract(a, c("namelist", "organization"), "(\\[.*?\\])\\s+([^,]*),")
         tidyr::extract(a, "Organization", "\\[.*?\\]\\s+([^,]*),")

head(Organize)
ABCDEFGHIJ0123456789
Category_ESI_cn
<chr>
UT
<chr>
工程学WOS:000389786500012
工程学WOS:000389786500012
工程学WOS:000389786500012
地球科学WOS:000390181000002
地球科学WOS:000390181000002
地球科学WOS:000390181000002
library(widyr)
paper_organ_pairs <- Organize %>% 
       group_by(Category_ESI_cn) %>%
       pairwise_count(Organization, UT, sort = TRUE, upper = FALSE)
head(paper_organ_pairs)
ABCDEFGHIJ0123456789
Category_ESI_cn
<chr>
item1
<chr>
item2
<chr>
n
<dbl>
工程学Sichuan UnivChengdu Univ Informat Technol31
工程学Univ Elect Sci & Technol ChinaChengdu Univ Informat Technol27
工程学Southwest Petr UnivChina Univ Petr23
工程学Sichuan UnivXihua Univ22
工程学Univ Elect Sci & Technol ChinaSichuan Normal Univ19
工程学Southwest Univ Sci & TechnolChina Acad Engn Phys18
linkss <- paper_organ_pairs %>% 
         select(item1, item2, n, Category_ESI_cn) %>%
         filter(n >= 10) %>% 
         filter(Category_ESI_cn %in% c("物理学", "化学", "数学", "工程学", "计算机科学", "材料科学")) %>%
         rename(from = item1, to = item2, weight = n)
linkss
ABCDEFGHIJ0123456789
from
<chr>
to
<chr>
weight
<dbl>
Category_ESI_cn
<chr>
Sichuan UnivChengdu Univ Informat Technol31工程学
Univ Elect Sci & Technol ChinaChengdu Univ Informat Technol27工程学
Southwest Petr UnivChina Univ Petr23工程学
Sichuan UnivXihua Univ22工程学
Univ Elect Sci & Technol ChinaSichuan Normal Univ19工程学
Southwest Univ Sci & TechnolChina Acad Engn Phys18工程学
Southwest Petr UnivSichuan Univ16工程学
Univ Elect Sci & Technol ChinaXihua Univ15工程学
Sichuan UnivSichuan Agr Univ14工程学
Southwest Petr UnivUniv Elect Sci & Technol China13工程学
library(igraph)
library(ggraph)

linkss %>%
  graph_from_data_frame(directed = F) %>%
  ggraph(layout = 'kk') + 
  geom_edge_fan(aes(edge_alpha = weight, 
                    edge_width = weight,
                    edge_colour = Category_ESI_cn)) +
  geom_node_point(size = 1) +
  geom_node_text(aes(label = name), repel = TRUE, 
                 point.padding = unit(0.2, "lines")) +
  theme_void() +
  facet_wrap(~ Category_ESI_cn)  

7.4 看看某个学校的呢

蓝皮书的网络图

sicnu_organize <- sicnu_set %>% 
         select(Category_ESI_cn, UT, C1) %>%
         mutate(a = str_extract_all(C1, mc) ) %>%
         unnest() %>% 
         #tidyr::extract(a, c("namelist", "organization"), "(\\[.*?\\])\\s+([^,]*),")
         tidyr::extract(a, "Organization", "\\[.*?\\]\\s+([^,]*),")

head(sicnu_organize)
ABCDEFGHIJ0123456789
Category_ESI_cn
<chr>
UT
<chr>
物理学WOS:000391685500028
物理学WOS:000391685500028
物理学WOS:000391685500028
化学WOS:000390294600057
化学WOS:000390294600057
综合交叉学科WOS:000391217400023
sicnu_organ_pairs <- sicnu_organize %>% 
       filter(!Organization %in% c("Sichuan Normal Univ")) %>% 
       count(Category_ESI_cn, Organization) %>%
       arrange(desc(n))

head(sicnu_organ_pairs)
ABCDEFGHIJ0123456789
Category_ESI_cn
<chr>
Organization
<chr>
n
<int>
化学Sichuan Univ78
物理学Sichuan Univ77
物理学Univ Elect Sci & Technol China48
化学Chinese Acad Sci42
物理学Chinese Acad Sci36
材料科学Hong Kong Polytech Univ34
library(tidygraph)
library(ggraph)

  sicnu_organ_pairs  %>% 
        rename(from = Category_ESI_cn, to = Organization, weight = n) %>% 
        filter( weight > 10 ) %>%
        as_tbl_graph(directed = F) %>%
        ggraph(layout = 'graphopt') + 
        geom_edge_link(aes(edge_alpha = weight, 
                           edge_width = weight)) +
        geom_node_point(size = 1) +
        geom_node_text(aes(label = name), repel = TRUE, 
                       point.padding = unit(0.2, "lines")) 

df_pairs <- sicnu_organ_pairs  %>% 
           filter( !is.na(Category_ESI_cn) ) %>% 
           rename(from = Category_ESI_cn, to = Organization, weight = n) %>% 
           filter( weight > 4 ) 
library(googleVis)
draftsankey <- gvisSankey(df_pairs, from = "from", to = "to", weight = "weight",
              options=list(height=1000, width=600,
              sankey="{
               link:{color:{fill: 'lightgray', fillOpacity: 0.7}},
               node:{nodePadding: 5, label:{fontSize: 12}, interactivity: true, width: 20},
              }")
  )

plot(draftsankey)

7.5 思考1

把发文量加进来,学院-学科-合作,这个有意思?

7.6 思考2

类似(NBA2015)4,学校类型- 学校 - 学科贡献。这个有意思。可能要调整数据集