B Lists and Dataframes Code

Video link: https://youtu.be/03xHdw8hkeE

################################################################################
#                   Lists and Dataframes
################################################################################
#Make a list
odd <- c(1, 3, 5, 7, 9)  #A list with numbers (integers, specifically)
gender <- c("male", "female", "nonbinary", "prefer not to respond")  #A list of strings (words)

#Make a sequence of numbers by using the seq() function
numbers <- seq(1:10)

numbers2 <- seq(1, 10)

#Count by 10s
numbers_v2 <- seq(10, 100, by = 10)
numbers_v2

#Count by 2s
odd_v2 <- seq(1, 197, by = 2) #Not reading this out - perhaps for obvious reasons!

#Picking a value out of a list (14th number)
odd_v2[14] 

#Saving the same value as an object
val <- odd_v2[14]

#Assign a pre-existing dataset to a dataframe object
df <- women 

#Looking at the first 6 rows of the dataset
head(df)

#Looking at the last 6 rows of the dataset
tail(df)

#Select the height column
df$height

#Select the first column.  
df[,1]

#Get variable names for our dataset
names(df)

#Print the value that is in the first column, 4th row to the console
df[4,1]

#Rename height to 'height(in)'
names(df)[1] <- "height(in)"

#Check our work
head(df)

#Change 'height(in)' back to 'height'
names(df)[names(df) == "height(in)"] <- "height"

#Check our work
head(df)

#Count by height on women dataset (we've assigned that to the object df)
table(df$height)

#Load in dataset, assign to "cars"
cars <- mtcars

#See what the variables are
names(cars)

#One of the variables in this dataset is "cyl"
#Ask for a count of the cyl
table(cars$cyl)

#Intro Stat code

Video link: https://youtu.be/M0hGwiHdYmI

################################################################################
#                        INTRO STAT FUNCTIONS                                  #
################################################################################
#Assign data
df <- women

#Mean
mean(df$height)

#Median
median(df$height)

#Minimum
min(df$height)

#Maximum
max(df$height)

#Standard Deviation
sd(df$height)

m <- mean(df$height)

summary(df)

#Run this first!
mode <- function(x) {
  u <- unique(x)
  tab <- tabulate(match(x, u))
  u[tab == max(tab)]
}

#Then you can use it as a function
mode(df$height)

#Load a different dataset
cars <- mtcars

#Try the mode function again
mode(cars$cyl)

#Call the package
library(psych)  

describe(df)