1. Basic dplyr functions
filter(flights, month == 1, day == 1) # filter by month and day
filter(flights, month == 1 | month == 2) # using and in filter
slice(flights, 1:10) # To select rows by position
arrange(flights, year, month, day) # Arrange rows in a order provided by the columns
arrange(flights, desc(arr_delay)) # Use desc() to order a column in descending order:
select(flights, year, month, day) # Select columns by name
select(flights, year:day) # Select all columns between year and day (inclusive)
select(flights, -(year:day)) # Select all columns except those from year to day (inclusive)
select(flights, tail_num = tailnum) # You can rename variables with select() by using named arguments:
rename(flights, tail_num = tailnum) #select() drops vars not mentioned, Instead, use rename(), tailnum is the orig name
distinct(flights, tailnum) # Use distinct()to find unique values in a table:
# Add new columns with mutate()
gain = arr_delay - dep_delay,
speed = distance / air_time * 60)
# mutate allows you to refer to columns that you have just created:
# If you only want to keep the new variables, use transmute():
#Summarise values with summarise()
delay = mean(dep_delay, na.rm = TRUE))
#Randomly sample rows with sample_n() and sample_frac()
sample_n(flights, 10) # for a fixed number
sample_frac(flights, 0.01) # for a fixed fraction
2. Grouping functions and merge
Trip <- Trip %>% arrange(SAMPN,arrtime2) %>%
group_by(SAMPN) %>% mutate(tripnum=1:n()) # Creates indexing by groups
#Merging data
S2SData <- S2SData %>%
left_join(StationNames,by = c('FromNode'='Node'))
3. Misc
# Example of reading files in a loop and renaming variable inside a loop using a string using rename_
for(iloop in 1:5){
hh_temp_summ <- hh_temp %>% group_by(maz) %>% summarise(num_hh=n()) %>% # read hh_temp based in iloop
rename_(.dots=setNames('num_hh', paste0('num_hh',iloop)))
assign(paste0('hhSumm',iloop),hh_temp_summ) # Assign to a summary dataset
# Apply a function in mutate row wise - can be useful if the function does not accept vectors
iris %>%
rowwise() %>%
mutate(Max.Len= max(Sepal.Length,Petal.Length))