第 15 章 耦合

可以包含 这里主要参考了 Aria, M. & Cuccurullo, C. (2017). bibliometrix: An R-tool for comprehensive science mapping analysis, Journal of Informetrics, 11(4), pp 959-975, Elsevier, DOI: 10.1016/j.joi.2017.08.007 (link) M.J. Cobo, A.G. López‐Herrera, E. Herrera‐Viedm,a F. Herrera (2011). Science mapping software tools: Review, analysis, and cooperative study among tools, JOURNAL OF THE AMERICAN SOCIETY FOR INFORMATION SCIENCE AND TECHNOLOGY, 62(7):1382–1402

15.1 Bibliographic coupling

# Load Packages
library(tidyverse)
library(stringr)

Converting your isi collection into a bibliographic dataframe

scient_df3 <- read_tsv("data/dataset/savedrecs_utf.txt", quote = "", col_names = TRUE) 
colnames(scient_df3)
##  [1] "PT" "AU" "BA" "BE" "GP" "AF" "BF" "CA" "TI" "SO"
## [11] "SE" "BS" "LA" "DT" "CT" "CY" "CL" "SP" "HO" "DE"
## [21] "ID" "AB" "C1" "RP" "EM" "RI" "OI" "FU" "FX" "CR"
## [31] "NR" "TC" "Z9" "U1" "U2" "PU" "PI" "PA" "SN" "EI"
## [41] "BN" "J9" "JI" "PD" "PY" "VL" "IS" "PN" "SU" "SI"
## [51] "MA" "BP" "EP" "AR" "DI" "D2" "EA" "EY" "PG" "WC"
## [61] "SC" "GA" "UT" "PM" "OA" "HC" "HP" "DA"

预处理非常必要

scient_df33 <- scient_df3 %>% 
            as.data.frame() %>% 
            mutate_all(funs(str_replace_all(., "\\s+", " "))) %>% 
            mutate(UT = str_replace(UT, "WOS:", "ISI")) %>% 
            mutate(DB = "ISI", ER = "") %>% 
            mutate_if( is_character, str_to_upper) %>%             
            mutate_all(as.character) %>% 
            mutate_at(vars(PY), as.numeric) %>%  
              mutate(AU = 
               str_replace_all(AU, c("," = " ", "\\." = "", "\\s+" = " ") ) %>% str_trim() 
                  )

如果做(文章与参考文献的耦合),不应该把作者unnest()

tb <- scient_df33 %>% select(AU, CR) %>% 
       #slice(1:10) %>%
       mutate(id = row_number()) %>%
       transform(
          #AU = str_split(AU, "; ") %>% map(unique),
          CR = str_split(CR, "; ")
       ) %>%
       #unnest(AU, .drop = FALSE) %>% 
       unnest(CR, .drop = FALSE) %>%
       mutate(CRlist = str_split(CR, ",") %>% map(unique)) %>%
       mutate(length = map_int(CRlist, ~ length(.)) ) %>%
       filter(length > 3 ) %>%
       mutate(fisrt = map_chr(CRlist, ~ first(.)) %>% map_chr(str_trim),
               Year = if_else(length > 3, map_chr(CRlist, 2), "NA"),
                 SO = if_else(length > 3, map_chr(CRlist, 3), "NA") %>% map_chr(str_trim)
             ) %>%
         mutate(DOI = str_extract(CR, "DOI\\s+([^,]*)") %>% str_replace("\\[", "")) %>%
       filter(!is.na(DOI))
glimpse(tb)
## Rows: 15,611
## Columns: 9
## $ AU     <chr> "QIAN Z; ZHAO MM; HOU BP; ZHAO YH", "Q~
## $ CR     <chr> "AGARWAL GS, 2010, PHYS REV A, V81, DO~
## $ id     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ CRlist <list> <"AGARWAL GS", " 2010", " PHYS REV A"~
## $ length <int> 5, 6, 7, 5, 5, 5, 5, 5, 6, 7, 7, 5, 6,~
## $ fisrt  <chr> "AGARWAL GS", "ASPELMEYER M", "BAGHERI~
## $ Year   <chr> " 2010", " 2014", " 2011", " 2016", " ~
## $ SO     <chr> "PHYS REV A", "REV MOD PHYS", "NAT NAN~
## $ DOI    <chr> "DOI 10.1103/PHYSREVA.81.041803", "DOI~

稀疏矩阵太大,还是用tidy大法好

dummy_sparse <- tb %>% count(id, DOI) %>% rename(row = id, column = DOI, value = n)
dummy_sparse

相当于矩阵转置

dummy_sparse_t <- dummy_sparse %>% rename(row = column, column = row) 
dummy_sparse_t