第 7 章 科研合作
看看我们的科研小圈子
7.1 机构匹配
需要用到强大的正则表达式
<- "\\[.*?\\]\\s+([^,]*),"
mc
%>% select(C1) %>%
complete_set top_n(1) %>%
str_view_all(mc)
7.2 化学学科
我们就看看四川省的化学学科吧
<- complete_set %>%
Organ filter(Category_ESI == "CHEMISTRY") %>%
select(UT, C1) %>%
mutate(a = str_extract_all(C1, mc) ) %>%
unnest() %>%
#tidyr::extract(a, c("namelist", "organization"), "(\\[.*?\\])\\s+([^,]*),")
::extract(a, "Organization", "\\[.*?\\]\\s+([^,]*),")
tidyr
head(Organ)
UT <chr> | |
---|---|
WOS:000388900200004 | |
WOS:000388900200004 | |
WOS:000388900200004 | |
WOS:000388900200004 | |
WOS:000387416500013 | |
WOS:000387416500013 |
library(widyr)
<- Organ %>%
paper_org_pairs pairwise_count(Organization, UT, sort = TRUE)
head(paper_org_pairs)
item1 <chr> | item2 <chr> | n <dbl> | ||
---|---|---|---|---|
China Acad Engn Phys | Southwest Univ Sci & Technol | 106 | ||
Southwest Univ Sci & Technol | China Acad Engn Phys | 106 | ||
Sichuan Normal Univ | Sichuan Univ | 70 | ||
Xihua Univ | Sichuan Univ | 70 | ||
Sichuan Univ | Sichuan Normal Univ | 70 | ||
Sichuan Univ | Xihua Univ | 70 |
<- paper_org_pairs %>% select(item1, item2, n) %>%
links filter(n >= 50) %>%
rename(from = item1, to = item2, weight = n)
links
from <chr> | to <chr> | weight <dbl> | ||
---|---|---|---|---|
China Acad Engn Phys | Southwest Univ Sci & Technol | 106 | ||
Southwest Univ Sci & Technol | China Acad Engn Phys | 106 | ||
Sichuan Normal Univ | Sichuan Univ | 70 | ||
Xihua Univ | Sichuan Univ | 70 | ||
Sichuan Univ | Sichuan Normal Univ | 70 | ||
Sichuan Univ | Xihua Univ | 70 | ||
Univ Georgia | Xihua Univ | 70 | ||
Xihua Univ | Univ Georgia | 70 | ||
Sichuan Univ | Chengdu Univ Technol | 65 | ||
Chengdu Univ Technol | Sichuan Univ | 65 |
library(igraph)
library(ggraph)
%>%
links graph_from_data_frame(directed = F) %>%
ggraph(layout = 'kk') +
geom_edge_fan(aes(edge_alpha = weight, edge_width = weight)) +
geom_node_point(size = 1) +
geom_node_text(aes(label = name), repel = TRUE,
point.padding = unit(0.2, "lines"))
#theme_void() +
#facet_wrap(~ type)
7.3 全部学科
不过瘾吗? 那我们就看看全局
<- complete_set %>%
Organize select(Category_ESI_cn, UT, C1) %>%
mutate(a = str_extract_all(C1, mc) ) %>%
unnest() %>%
#tidyr::extract(a, c("namelist", "organization"), "(\\[.*?\\])\\s+([^,]*),")
::extract(a, "Organization", "\\[.*?\\]\\s+([^,]*),")
tidyr
head(Organize)
Category_ESI_cn <chr> | UT <chr> | |
---|---|---|
工程学 | WOS:000389786500012 | |
工程学 | WOS:000389786500012 | |
工程学 | WOS:000389786500012 | |
地球科学 | WOS:000390181000002 | |
地球科学 | WOS:000390181000002 | |
地球科学 | WOS:000390181000002 |
library(widyr)
<- Organize %>%
paper_organ_pairs group_by(Category_ESI_cn) %>%
pairwise_count(Organization, UT, sort = TRUE, upper = FALSE)
head(paper_organ_pairs)
Category_ESI_cn <chr> | item1 <chr> | item2 <chr> | n <dbl> | |
---|---|---|---|---|
工程学 | Sichuan Univ | Chengdu Univ Informat Technol | 31 | |
工程学 | Univ Elect Sci & Technol China | Chengdu Univ Informat Technol | 27 | |
工程学 | Southwest Petr Univ | China Univ Petr | 23 | |
工程学 | Sichuan Univ | Xihua Univ | 22 | |
工程学 | Univ Elect Sci & Technol China | Sichuan Normal Univ | 19 | |
工程学 | Southwest Univ Sci & Technol | China Acad Engn Phys | 18 |
<- paper_organ_pairs %>%
linkss select(item1, item2, n, Category_ESI_cn) %>%
filter(n >= 10) %>%
filter(Category_ESI_cn %in% c("物理学", "化学", "数学", "工程学", "计算机科学", "材料科学")) %>%
rename(from = item1, to = item2, weight = n)
linkss
from <chr> | to <chr> | weight <dbl> | Category_ESI_cn <chr> | |
---|---|---|---|---|
Sichuan Univ | Chengdu Univ Informat Technol | 31 | 工程学 | |
Univ Elect Sci & Technol China | Chengdu Univ Informat Technol | 27 | 工程学 | |
Southwest Petr Univ | China Univ Petr | 23 | 工程学 | |
Sichuan Univ | Xihua Univ | 22 | 工程学 | |
Univ Elect Sci & Technol China | Sichuan Normal Univ | 19 | 工程学 | |
Southwest Univ Sci & Technol | China Acad Engn Phys | 18 | 工程学 | |
Southwest Petr Univ | Sichuan Univ | 16 | 工程学 | |
Univ Elect Sci & Technol China | Xihua Univ | 15 | 工程学 | |
Sichuan Univ | Sichuan Agr Univ | 14 | 工程学 | |
Southwest Petr Univ | Univ Elect Sci & Technol China | 13 | 工程学 |
library(igraph)
library(ggraph)
%>%
linkss graph_from_data_frame(directed = F) %>%
ggraph(layout = 'kk') +
geom_edge_fan(aes(edge_alpha = weight,
edge_width = weight,
edge_colour = Category_ESI_cn)) +
geom_node_point(size = 1) +
geom_node_text(aes(label = name), repel = TRUE,
point.padding = unit(0.2, "lines")) +
theme_void() +
facet_wrap(~ Category_ESI_cn)
7.4 看看某个学校的呢
蓝皮书的网络图
<- sicnu_set %>%
sicnu_organize select(Category_ESI_cn, UT, C1) %>%
mutate(a = str_extract_all(C1, mc) ) %>%
unnest() %>%
#tidyr::extract(a, c("namelist", "organization"), "(\\[.*?\\])\\s+([^,]*),")
::extract(a, "Organization", "\\[.*?\\]\\s+([^,]*),")
tidyr
head(sicnu_organize)
Category_ESI_cn <chr> | UT <chr> | |
---|---|---|
物理学 | WOS:000391685500028 | |
物理学 | WOS:000391685500028 | |
物理学 | WOS:000391685500028 | |
化学 | WOS:000390294600057 | |
化学 | WOS:000390294600057 | |
综合交叉学科 | WOS:000391217400023 |
<- sicnu_organize %>%
sicnu_organ_pairs filter(!Organization %in% c("Sichuan Normal Univ")) %>%
count(Category_ESI_cn, Organization) %>%
arrange(desc(n))
head(sicnu_organ_pairs)
Category_ESI_cn <chr> | Organization <chr> | n <int> | ||
---|---|---|---|---|
化学 | Sichuan Univ | 78 | ||
物理学 | Sichuan Univ | 77 | ||
物理学 | Univ Elect Sci & Technol China | 48 | ||
化学 | Chinese Acad Sci | 42 | ||
物理学 | Chinese Acad Sci | 36 | ||
材料科学 | Hong Kong Polytech Univ | 34 |
library(tidygraph)
library(ggraph)
%>%
sicnu_organ_pairs rename(from = Category_ESI_cn, to = Organization, weight = n) %>%
filter( weight > 10 ) %>%
as_tbl_graph(directed = F) %>%
ggraph(layout = 'graphopt') +
geom_edge_link(aes(edge_alpha = weight,
edge_width = weight)) +
geom_node_point(size = 1) +
geom_node_text(aes(label = name), repel = TRUE,
point.padding = unit(0.2, "lines"))
<- sicnu_organ_pairs %>%
df_pairs filter( !is.na(Category_ESI_cn) ) %>%
rename(from = Category_ESI_cn, to = Organization, weight = n) %>%
filter( weight > 4 )
library(googleVis)
<- gvisSankey(df_pairs, from = "from", to = "to", weight = "weight",
draftsankey options=list(height=1000, width=600,
sankey="{
link:{color:{fill: 'lightgray', fillOpacity: 0.7}},
node:{nodePadding: 5, label:{fontSize: 12}, interactivity: true, width: 20},
}")
)
plot(draftsankey)
7.6 思考2
类似(NBA2015)4,学校类型- 学校 - 学科贡献。这个有意思。可能要调整数据集