19.2 WRDS

Retrieve data from WRDS

# to set up connection from R to WRDS (https://wrds-www.wharton.upenn.edu/pages/support/programming-wrds/programming-r/r-from-your-computer/)
library(RPostgres)
library(dplyr)

# I've set up wrds connection before hand. Please use your username and password here.

# wrds <- dbConnect(Postgres(),
#                   host='wrds-pgdata.wharton.upenn.edu',
#                   port=9737,
#                   dbname='wrds',
#                   sslmode='require',
#                   user='')

# Check variables (column headers) in COMP ANNUAL FUNDAMENTAL
#uses the already-established wrds connection to prepare the SQL query string and save the query as the result res.
# check avaiable databases: https://wrds-web.wharton.upenn.edu/wrds/tools/variable.cfm?vendor_id=7
res <- dbSendQuery(wrds, "select column_name
                   from information_schema.columns
                   where table_schema='compa'
                   and table_name='funda'
                   order by column_name")
data <- dbFetch(res, n=-1) # fetches the data that results from running the query res against wrds and stores it as data
dbClearResult(res) # closes the connection
head(data)

# select everything
res <- dbSendQuery(wrds, "select * from compa.funda")

# from compa.funda

# only select the following variables
res <- dbSendQuery(wrds, "select gvkey, datadate, fyear, indfmt, consol, popsrc, datafmt, tic, cusip, conm, curcd, fyr, act, at, bkvlps, ceq, ch, che, dltt, dlc, emp, np, exchg, cik, costat, naicsh,mkvalt from compa.funda") #check variables from (https://wrds-web.wharton.upenn.edu/wrds/ds/comp/funda/index.cfm?navId=80)

# res <- dbSendQuery(wrds, "select gvkey, gp from compa.funda") #check variables from
data1 <- dbFetch(res, n=-1)
dbClearResult(res)

data = data1 %>%
    distinct(gvkey,datadate,fyear,tic,conm,.keep_all = T)