#Nathaniel Mark
#Recitation January 27, 2017
#Intro to R
#A Note: For this recitation, I assume we are using RStudio.
#You should use RStudio, as it is much easier to use as beginners.
#If you want to learn more about using R without RStudio, see the
#Professor's R notes.
#PART I: Basics of writing up an R Script-
# #excludes the line from code
# Import Dataset>auto.dta (in the box to the right)
#The data we are using in this exercise is .dta, however, so we have to be a little fancier.
auto = read.dta("C:/users/ndm2125/Downloads/auto.dta")
#Another way is to set the working directory to be a certain folder, then load it from that folder.
setwd("C:/users/ndm2125/Downloads/")
auto = read.dta("auto.dta")
#This generates what is called a dataframe. It is a collection of named vectors (i.e. variables)
#Variables can be obtained in two ways:
#1)
auto$price
#2)
attach(auto)
price
#This makes it so you are working within this specific dataframe. Be Careful!
detach(auto) #Takes you out of that dataframe
#Use the str or head function to describe the dataframe:
str(auto, give.attr = FALSE) #Note: you do not need the give.attr option at the end.
#This gives us, for each variable: the variable name, type and the first few data points.
#Also note, this information is in the environment window in RStudio.
head(auto)
#this gives us a look into the first couple values for each variable.
#PART III: Analyzing the raw data
#Most simply, summary() gives you summary statistics of each variable that is numeric.
summary(auto)
#Summary can also be used to give summary statistics for a specific variable:
summary(auto$mpg)
#More specific summary statistics can be found with:
describe(auto$mpg)
#ERROR! This is to remind you about packages. Load the package needed!
??describe #searching for functions in a non-loaded package
install.packages("psych")
library(psych)
describe(auto$mpg)
#Subsets: Say we only want to see the MPG of foreign cars
subset(auto$mpg, auto$foreign == "Foreign")
#Or the MPG of cars that weigh over 3500 pounds:
subset(auto$mpg, auto$weight >= 3500)
#We can also take a subset of the entire dataframe, if we only want to keep a few variables:
NewDataFrame