Chapter 1 Prerequisites

This is a sample book written in Markdown. You can use anything that Pandoc’s Markdown supports, e.g., a math equation \(a^2 + b^2 = c^2\).

The bookdown package can be installed from CRAN or Github:

install.packages("bookdown")
# or the development version
# devtools::install_github("rstudio/bookdown")

Remember each Rmd file contains one and only one chapter, and a chapter is defined by the first-level heading #.

To compile this example to PDF, you need XeLaTeX. You are recommended to install TinyTeX (which includes XeLaTeX): https://yihui.org/tinytex/.


title: “202AIE31 HW1” output: word_document: default html_document: default —Sorry, Google periodically requires an additional confirmation. Please try again.

library(ggplot2)
mpg
## # A tibble: 234 x 11
##    manufacturer model    displ  year   cyl trans   drv     cty   hwy fl    class
##    <chr>        <chr>    <dbl> <int> <int> <chr>   <chr> <int> <int> <chr> <chr>
##  1 audi         a4         1.8  1999     4 auto(l~ f        18    29 p     comp~
##  2 audi         a4         1.8  1999     4 manual~ f        21    29 p     comp~
##  3 audi         a4         2    2008     4 manual~ f        20    31 p     comp~
##  4 audi         a4         2    2008     4 auto(a~ f        21    30 p     comp~
##  5 audi         a4         2.8  1999     6 auto(l~ f        16    26 p     comp~
##  6 audi         a4         2.8  1999     6 manual~ f        18    26 p     comp~
##  7 audi         a4         3.1  2008     6 auto(a~ f        18    27 p     comp~
##  8 audi         a4 quat~   1.8  1999     4 manual~ 4        18    26 p     comp~
##  9 audi         a4 quat~   1.8  1999     4 auto(l~ 4        16    25 p     comp~
## 10 audi         a4 quat~   2    2008     4 manual~ 4        20    28 p     comp~
## # ... with 224 more rows
ggplot(data = mpg, aes(x = hwy, fill = drv))+ 
  geom_histogram(alpha = 0.5)+
      labs(title = "Histogram",
       subtitle = "Histogram of Highway Mile Per Gallon",
       caption = "Source: mpg",
       x = "hwy",
       y = "count")+
  theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = mpg, aes(x = hwy, fill = drv))+ 
  geom_histogram(alpha = 0.5)+
    labs(title = "Histogram using facet_grid()",
       subtitle = "Histogram of Highway Mile Per Gallon",
       caption = "Source: mpg",
       x = "hwy",
       y = "count")+
  theme_minimal()+
  facet_grid(drv~.)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

midwest
## # A tibble: 437 x 28
##      PID county state  area poptotal popdensity popwhite popblack popamerindian
##    <int> <chr>  <chr> <dbl>    <int>      <dbl>    <int>    <int>         <int>
##  1   561 ADAMS  IL    0.052    66090      1271.    63917     1702            98
##  2   562 ALEXA~ IL    0.014    10626       759      7054     3496            19
##  3   563 BOND   IL    0.022    14991       681.    14477      429            35
##  4   564 BOONE  IL    0.017    30806      1812.    29344      127            46
##  5   565 BROWN  IL    0.018     5836       324.     5264      547            14
##  6   566 BUREAU IL    0.05     35688       714.    35157       50            65
##  7   567 CALHO~ IL    0.017     5322       313.     5298        1             8
##  8   568 CARRO~ IL    0.027    16805       622.    16519      111            30
##  9   569 CASS   IL    0.024    13437       560.    13384       16             8
## 10   570 CHAMP~ IL    0.058   173025      2983.   146506    16559           331
## # ... with 427 more rows, and 19 more variables: popasian <int>,
## #   popother <int>, percwhite <dbl>, percblack <dbl>, percamerindan <dbl>,
## #   percasian <dbl>, percother <dbl>, popadults <int>, perchsd <dbl>,
## #   percollege <dbl>, percprof <dbl>, poppovertyknown <int>,
## #   percpovertyknown <dbl>, percbelowpoverty <dbl>, percchildbelowpovert <dbl>,
## #   percadultpoverty <dbl>, percelderlypoverty <dbl>, inmetro <int>,
## #   category <chr>
ggplot(data = midwest, aes(x = area, y = poptotal))+ 
  scale_x_continuous(limits = c(0,0.1))+
  scale_y_continuous(limits = c(0,500000))+
  geom_point(aes(color = state, size = popdensity), alpha = 0.4)+
  geom_smooth(se = FALSE)+
  labs(title = "Scatterplot",
       subtitle = "Area Vs Population",
       caption = "Source: midwest",
       x = "Area",
       y = "Population",
       options(scipen=999))+
    theme_classic()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +

geom_point(aes(color=drv)) + geom_smooth(method="lm") +

labs(title ="MPG vs Engine size", x = "Engine size", y = "MPG")
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width))+ 
  geom_point(aes(color = Species, shape = Species), alpha = 0.5, size = 6)+
  labs(title = "Scatterplot",
       subtitle = "Sepal.Length Vs Sepal.Width",
       caption = "Source: iris",
       x = "Sepal.Length",
       y = "Sepal.Width",
       options(scippen=999))+
    theme_minimal()

library(gcookbook)
ggplot(data = heightweight, aes(x = heightIn, y = weightLb))+ 
  geom_point(aes(color = sex), alpha = 0.5, size = 3)+
  geom_smooth(aes(color = sex), method = lm, se = FALSE)+
  labs(title = "Scatterplot",
       subtitle = "Weight Vs Hight",
       caption = "Source: heightweight",
       x = "heightIn",
       y = "weightLb")+
    theme_classic()
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = mpg, aes(x = manufacturer, fill = class))+ 
  geom_bar(width = 0.5)+
      labs(title = "Barplot",
       subtitle = "Manufacturer across Vehicle Classes",
       caption = "Source: mpg",
       x = "manufacturer",
       y = "count")+
  theme_minimal()+
  theme(axis.text.x=element_text(angle=65, hjust=1))+
  scale_fill_brewer(palette = 'Spectral')