RBasic1, RBasic2, RBasic3, data.table, dplyr, tidyr, RGIS, Leaflet

1. Basic dplyr functions

library(dplyr)

filter(flights, month == 1, day == 1) # filter by month and day
filter(flights, month == 1 | month == 2) # using and in filter
slice(flights, 1:10) # To select rows by position

arrange(flights, year, month, day) # Arrange rows in a order provided by the columns
arrange(flights, desc(arr_delay)) # Use desc() to order a column in descending order:

select(flights, year, month, day) # Select columns by name
select(flights, year:day) # Select all columns between year and day (inclusive)
select(flights, -(year:day)) # Select all columns except those from year to day (inclusive)
select(flights, tail_num = tailnum) # You can rename variables with select() by using named arguments:

rename(flights, tail_num = tailnum) #select() drops vars not mentioned, Instead, use rename(), tailnum is the orig name

distinct(flights, tailnum) # Use distinct()to find unique values in a table:

# Add new columns with mutate()
mutate(flights,
  gain = arr_delay - dep_delay,
  speed = distance / air_time * 60)
# mutate allows you to refer to columns that you have just created:
# If you only want to keep the new variables, use transmute():

#Summarise values with summarise()
summarise(flights,
  delay = mean(dep_delay, na.rm = TRUE))

#Randomly sample rows with sample_n() and sample_frac()
sample_n(flights, 10) # for a fixed number
sample_frac(flights, 0.01) # for a fixed fraction

2. Grouping functions and merge

Trip <- Trip %>% arrange(SAMPN,arrtime2) %>% 
  group_by(SAMPN) %>% mutate(tripnum=1:n()) # Creates indexing by groups

#Merging data
S2SData <- S2SData %>%    
  left_join(StationNames,by = c('FromNode'='Node'))

3. Misc

# Example of reading files in a loop  and renaming variable inside a loop using a string using rename_
for(iloop in 1:5){
  hh_temp_summ <- hh_temp %>% group_by(maz) %>% summarise(num_hh=n()) %>% # read hh_temp based in iloop
    rename_(.dots=setNames('num_hh', paste0('num_hh',iloop)))
  assign(paste0('hhSumm',iloop),hh_temp_summ) # Assign to a summary dataset
}

# Apply a function in mutate row wise - can be useful if the function does not accept vectors
iris %>% 
  rowwise() %>% 
  mutate(Max.Len= max(Sepal.Length,Petal.Length))