B Lists and Dataframes Code
Video link: https://youtu.be/03xHdw8hkeE
################################################################################
# Lists and Dataframes
################################################################################
#Make a list
odd <- c(1, 3, 5, 7, 9) #A list with numbers (integers, specifically)
gender <- c("male", "female", "nonbinary", "prefer not to respond") #A list of strings (words)
#Make a sequence of numbers by using the seq() function
numbers <- seq(1:10)
numbers2 <- seq(1, 10)
#Count by 10s
numbers_v2 <- seq(10, 100, by = 10)
numbers_v2
#Count by 2s
odd_v2 <- seq(1, 197, by = 2) #Not reading this out - perhaps for obvious reasons!
#Picking a value out of a list (14th number)
odd_v2[14]
#Saving the same value as an object
val <- odd_v2[14]
#Assign a pre-existing dataset to a dataframe object
df <- women
#Looking at the first 6 rows of the dataset
head(df)
#Looking at the last 6 rows of the dataset
tail(df)
#Select the height column
df$height
#Select the first column.
df[,1]
#Get variable names for our dataset
names(df)
#Print the value that is in the first column, 4th row to the console
df[4,1]
#Rename height to 'height(in)'
names(df)[1] <- "height(in)"
#Check our work
head(df)
#Change 'height(in)' back to 'height'
names(df)[names(df) == "height(in)"] <- "height"
#Check our work
head(df)
#Count by height on women dataset (we've assigned that to the object df)
table(df$height)
#Load in dataset, assign to "cars"
cars <- mtcars
#See what the variables are
names(cars)
#One of the variables in this dataset is "cyl"
#Ask for a count of the cyl
table(cars$cyl)
#Intro Stat code
Video link: https://youtu.be/M0hGwiHdYmI
################################################################################
# INTRO STAT FUNCTIONS #
################################################################################
#Assign data
df <- women
#Mean
mean(df$height)
#Median
median(df$height)
#Minimum
min(df$height)
#Maximum
max(df$height)
#Standard Deviation
sd(df$height)
m <- mean(df$height)
summary(df)
#Run this first!
mode <- function(x) {
u <- unique(x)
tab <- tabulate(match(x, u))
u[tab == max(tab)]
}
#Then you can use it as a function
mode(df$height)
#Load a different dataset
cars <- mtcars
#Try the mode function again
mode(cars$cyl)
#Call the package
library(psych)
describe(df)