data management
2015-10-14 22:52:02 4 举报
AI智能生成
R basic data management
作者其他创作
大纲/内容
Missing Values
Check Missing values
is.na()
Recoding values to missing
leadership$age[leadership$age == 99] <- NA
Excluding missing values from analyses
na.omit(leadership)
Date Values
Convert Date
strDates <- c("01/05/1965", "08/16/1975")
dates <- as.Date(strDates, "%m/%d/%Y")
Util
today <- Sys.Date()
format(today, format="%B %d %Y")
Type Conversions
Test Type
is.numeric()
is.character()
is.vector()
is.matrix()
is.data.frame()
is.factor()
is.logical()
Convert Type
as.numeric()
as.character()
as.vector()
as.matrix()
as.data.frame()
as.factor()
as.logical()
Sorting Data
newdata <- leadership[order(leadership$age),]
attach(leadership)
newdata <- leadership[order(gender, age),]
detach(leadership)
attach(leadership)
newdata <-leadership[order(gender, -age),]
detach(leadership)
Merging datasets
Adding columns
Join By Key(s)
total <- merge(dataframeA, dataframeB, by="ID")
total <- merge(dataframeA, dataframeB, by=c("ID","Country"))
Without Common Key
total <- cbind(A, B)
Adding rows
total <- rbind(dataframeA, dataframeB)
Subsetting datasets
Selecting (keeping) variables
newdata <- leadership[, c(6:10)]
myvars <- c("q1", "q2", "q3", "q4", "q5")
newdata <-leadership[myvars]
myvars <- paste("q", 1:5, sep="")
newdata <- leadership[myvars]
Excluding (dropping) variables
myvars <- names(leadership) %in% c("q3", "q4")
newdata <- leadership[!myvars]
newdata <- leadership[c(-8,-9)]
leadership$q3 <- leadership$q4 <- NULL
Selecting observations
newdata <- leadership[1:3,]
newdata <- leadership[which(leadership$gender=="M" &
leadership$age > 30),]
attach(leadership)
newdata <- leadership[which(gender=='M' & age > 30),]
detach(leadership)
The subset() function
newdata <- subset(leadership, age >= 35 | age < 24,
select=c(q1, q2, q3, q4))
newdata <- subset(leadership, gender=="M" & age > 25,
select=gender:q4)
Random samples
mysample <- leadership[sample(1:nrow(leadership), 3, replace=FALSE),]
Using SQL statements to manipulate data frames
> library(sqldf)
> newdf <- sqldf("select * from mtcars where carb=1 order by mpg",
row.names=TRUE)
> newdf
Valiant 18.1 6 225.0 105 2.76 3.46 20.2 1 0 3 1
0 条评论
下一页