R入門1
Page content
Rの入門
# modulus operator (remainder of 5 divided by 2)
print(5 %% 2)
[1] 1
要素は偶数個なので、medianは真ん中の二つの数字の平均を取る
median(c(3, 7, 6, 10, 3, 7))
[1] 6.5
var <- 10.
print(var^2)
print(var/3)
[1] 100
[1] 3.333333
lang.domain <- "statistics"
lang.domain <- toupper(lang.domain)
print(lang.domain)
[1] "STATISTICS"
# # substitutes every "i" for "I"
lang.domain <- "statistics"
print(gsub("i", "I", lang.domain) )
[1] "statIstIcs"
lang.domain <- "statistics"
print(substr(lang.domain, 1,4))
[1] "stat"
# combines character strings
lang.domain <- "statistics"
print(paste("R does ", lang.domain, "!!!"))
[1] "R does statistics !!!"
if ((2+2==4) && (2*2==4)) {
print("very good")
}
[1] "very good"
# gsub help page
?gsub
# gsubの例を出す
example(gsub)
# get the first value from a vector
our.vect <- c(8, 2, 3)
our.vect[1] = 9
print(our.vect)
[1] 9 2 3
our.vect <- c(8, 2, 3)
print(length(our.vect))
[1] 3
our.vect <- c(8, 2, 3)
# 二個目の要素、一個目の要素、3個目の要素
print(our.vect[c(2,1,3)] )
[1] 2 8 3
other.vector <- 1:5
print(other.vector)
[1] 1 2 3 4 5
# 10と0は含まれる
another.vector <- seq(10,0, by=-2)
print(another.vector)
[1] 10 8 6 4 2 0
another.vector <- c(5,4,3,2,1)
# 2から4までで要素を取得
print(another.vector[2:4])
[1] 4 3 2
print(max(1:10))
print(min(1:10))
print(sum(1:10))
print(mean(1:10))
# standard deviation
print(sd(1:10))
[1] 10
[1] 1
[1] 55
[1] 5.5
[1] 3.02765
messy.vector <- c(2,4,NA,4)
print(sum(messy.vector)) # NA is not allowed
print(sum(messy.vector, na.rm=TRUE))
print(sum(messy.vector, na.rm=FALSE))
[1] NA
[1] 10
[1] NA
messy.vector <- c(2,4,NA,4)
# NAであるかどうかチェック
print(is.na(messy.vector))
[1] FALSE FALSE TRUE FALSE
messy.vector <- c(2,4,NA,4)
# get a count of the number of NA
print(sum(is.na(messy.vector)) )
[1] 1
our.vect <- c(8, 2, 3)
# 8だけ5より大きい
print(our.vect > 5)
print(typeof(our.vect > 5))
print(sum(our.vect>5))
[1] TRUE FALSE FALSE
[1] "logical"
[1] 1
messy.vector <- c(2,4,NA,4)
# NAでない要素を抽出
print(messy.vector[!is.na(messy.vector)])
[1] 2 4 4
messy.vector <- c(2,4,NA,4)
# NAのところ0で埋める
messy.vector[is.na(messy.vector)] <- 0
print(messy.vector)
[1] 2 4 0 4
# extract every other digit
our.vect <- c(8, 2, 3)
print(our.vect[c(TRUE, FALSE)])
[1] 8 3
# 関数の作り方
is.divisible.by <- function(large.number, smaller.number) {
if (large.number %% smaller.number != 0)
# FALSEの括弧が必要
return (FALSE)
return (TRUE)
}
is.even <- function(num) {
is.divisible.by(num, 2)
}
our.vect <- c(8, 2, 3, 6)
print(sapply(our.vect, is.even))
# sapplyの第二の引数直接関数を実装する
print( sapply(our.vect, function(num){is.divisible.by(num, 3)}) )
[1] TRUE TRUE FALSE TRUE
[1] FALSE FALSE TRUE TRUE
# 上で定義した関数を利用して、要素を抽出
our.vect <- c(8, 2, 3, 6)
where.even <- sapply(our.vect, is.even)
where.div.3 <- sapply(our.vect, function(num){
is.divisible.by(num, 3)
})
our.vect[where.even & where.div.3]
6
# R has the matrix data structures
a.matrix <- matrix(c(2,3,4,5))
print(a.matrix)
[,1]
[1,] 2
[2,] 3
[3,] 4
[4,] 5
a.matrix <- matrix(c(2,3,4,5), ncol=2)
print(a.matrix)
[,1] [,2]
[1,] 2 4
[2,] 3 5
# bind two vectors
# colume bind
a2.matrix <- cbind(c(2,3,4), c(4,5,6))
print(a2.matrix)
[,1] [,2]
[1,] 2 4
[2,] 3 5
[3,] 4 6
# row bind
a3.matrix <- rbind(c(2,3,4), c(4,5,6))
print(a3.matrix)
[,1] [,2] [,3]
[1,] 2 3 4
[2,] 4 5 6
a3.matrix <- rbind(c(2,3,4), c(4,5,6))
# matrix transposition
print(t(a3.matrix))
[,1] [,2]
[1,] 2 4
[2,] 3 5
[3,] 4 6
a3.matrix <- rbind(c(2,3,4), c(4,5,6))
print(a3.matrix)
# if vectors have sapply(), then matrices have apply()
# 1 for row
print(apply(a3.matrix, 1, sum) )
# 2 for colume
print(apply(a3.matrix, 2, mean) )
[,1] [,2] [,3]
[1,] 2 3 4
[2,] 4 5 6
[1] 9 15
[1] 3 4 5
Data frameのwhen変数に10/2
のような文字列を日付に変更する
mutate(df, when = as.Date(when, "%m/%d")) # 2018-10-02のような日付になる。