This is a compilation of basic R utilities.
RBasic1, RBasic2, RBasic3, data.table, dplyr, tidyr, RGIS, Leaflet
1. System functions
#RStudio Clear Screen = Ctrl+L
sessionInfo() #gives info about versions of loaded packages
rm(list=ls(all=TRUE)) # Remove/delete
rm(list=ls(pattern = "acc"))
gc() # garbage collection
getwd() # Getting directory
basename(getwd()) # Get directory name
dirname(getwd()) # Getting directory of parent
#Setting directory
PrjDr <- "C:\\Temp"
curDir <- paste0(PrjDr,"\\Tasks\\First\\Second");setwd(curDir)
file.edit('~/.Rprofile')
options(rpubs.upload.method = "internal")
#Function example
Mysummary <- function(x,y,x){ return (x+y+z)
}
# objects in memory
sort( sapply(ls(),function(x){object.size(get(x))}))
#Run time
system.time(Mysummary(2,5,3))
sapply(df,class) # get column type for each column
.libPaths('C:\\Users\\sidharthanr\\Documents\\R\\win-library\\3.2') # to add user libs
Sys.time() # to print current date/time
suppressMessages(library(bit64)) # to suppress messages while loading a library
#Use library() to see all packages installed, search() to see all packages loaded
str(df) # Compactly Display the Structure
if(exists('inpShpFile')) # check whether an object named inpShpFile exists
2. Reading and Writing files
#Reading and writing a DBF file
library(foreign);ind <- read.dbf("Ind_CNS2011.dbf", as.is = FALSE)
write.dbf(ind,"ind_out.dbf")
#Reading and writing a SPSS file
library(foreign);test1 <- read.spss("spss.sav")
library(memisc);test2 <- as.data.set(spss.system.file("spss.sav"))
incsv <- read.csv("test.csv",header = TRUE)
incsv <- read.csv("test.csv",header = TRUE,colClasses=c(rep("numeric",1),rep("character",2)))
incsv <- read.table("test.csv",sep=',',header=TRUE)
write.csv(incsv, file = "out.csv",row.names=FALSE)
write.table(incsv,file="out.csv",append=F,quote=T,sep = '\t',na = "NA",row.names=F,col.names=T)
## returns a vector equal to the number of lines in the file
count.fields("out.csv", sep = "\t")
## returns the maximum to set colClasses
max(count.fields("out.csv", sep = "\t"))
3. Common operations
colMeans(aq,na.rm = T) # removes na records by variable(not by row) - see also colSums
sapply(PSN,class) # get column data type
selNum <- sapply(PERTYPE_march,is.numeric) # select numeric cols
mgd <- merge(ds1, df2[,c("v1","v2")], by = c("v1","v2") ) # Merge
x %% y #modulus (x mod y) 5%%2 is 1
x %/% y #integer division 5%/%2 is 2
identical( AB1, AB2 ) #Check whether identical
aq <- aq[order(aq$v1, aq$v2),] #Sort DF
df$sl <- formatC(df$sl,width=2,format="d",flag = "0") # 1. Formatting and padding number with leading zeros
aq2 <- subset(aq, select = -c(Ozone,Solar.R) ) # Dropping specific columns
aq2 <- aq[aq$Month==5,] # dropping observation
aq2 <- subset(aq,Month==5) # dropping observation
paste("str",num,sep="") # concat string and numeric - see also paste0 (default sep="")
paste("create table t",i,sep="")
paste0("no", "gap")â pastes with default sep=ââ
assign(paste0("ds",10),read.csv(paste("dset",10,".csv",sep=""))) #dynamic LHS dset name
get("sht57") # opposite of assign/returns the object with that name
strsplit(x, "e") # String split at delimiter- to get vector
aq2 <- aggregate(list(mnOzone=aq$Ozone),aq['Month'],mean,na.rm = T) # aggregate in data frame
temp <- avgDistTbl[avgDistTbl$trippurpD %in% c('WBO','OBO','HBU','HBSC'),] # Using the 'in' to check in a list
# "%in%" <- function(x, table) match(x, table, nomatch = 0) > 0
quantile(tstData,probs = (1:5)/5) # gives 5 quantiles
LTripStr$trippurp[is.na(LTripStr$nStrStps)]=2 #Check variables NA - missing value
df[duplicated(df), ] # select duplicated records
feature$ATTR <- factor(feature$ATTR). #R doesn't automatically drop unused factor levels, have to re-factor:
# creating ordered factors - below within data.table
tripData[,tripCat:=factor(tripCat,levels=c('HBW','HBO','NHB'),ordered=TRUE)]
# create cross tabs
mytable <- xtabs(~PERTYPE+HHVEH+distBand, data=trpGPSNS3)
xtabs(EST~origCnty+destCnty,wrkFlow) - to sum up the EST variable, workflow dset name
#Random number related
set.seed(123) - sets seed
runif(nAlt*nvar*nObs, min=0, max=1) # random uniform distribution
# Creating formulae dynamically from passsed strings or using paste
f1 <- as.name(names(allTemps)[1])
allTempsSMRY <- dcast.data.table(allTempsMelt,eval(bquote(.(f1)~variable+scenName)),value.var = 'value',fill=-1)
File Access Functions
#setwd('C:\\Users\\sidharthanr\\Dropbox\\Resource\\Git\\Resource')
print(file.info('test.csv'))
## size isdir mode mtime ctime
## test.csv 441 FALSE 666 2015-08-19 07:52:11 2016-09-30 19:14:15
## atime exe
## test.csv 2016-09-30 19:14:15 no
print(list.files())
## [1] "docs" "dplyr.Rmd" "Leaflet.Rmd" "Numpy.Rmd"
## [5] "out.csv" "Pandas.Rmd" "PyBasic1.html" "PyBasic1.Rmd"
## [9] "RBasic1.Rmd" "RBasic2.Rmd" "RBasic3.Rmd" "RDTable.Rmd"
## [13] "README.md" "RGIS.Rmd" "RMisc.Rmd" "Starter.Rmd"
## [17] "test.csv" "tidyr.Rmd"
Parallel Threading
library(parallel)
cl <- makeCluster(detectCores() - 1)
clusterEvalQ(cl, library("rgdal"))
clusterExport(cl, c("tranFileList2"))
system.time(inpData <- parLapply(cl,1:length(tranFileList2),function(ii) read.fwf(tranFileList2[ii],widths = c(20,5,5,7,7,5,7,7,2,2,7,7,7,7,7)) ) )
stopCluster(cl) # To stop cluster and free resources
Creating a PDF from a vector
plot(density(runif(50)))