R入門1

2018-10-23 r

Page content

Rの入門

# modulus operator (remainder of 5 divided by 2)
print(5 %% 2)

[1] 1

要素は偶数個なので、medianは真ん中の二つの数字の平均を取る

median(c(3, 7, 6, 10, 3, 7))

[1] 6.5

var <- 10.
print(var^2)
print(var/3)

[1] 100
[1] 3.333333

lang.domain <- "statistics"
lang.domain <- toupper(lang.domain)
print(lang.domain)

[1] "STATISTICS"

# # substitutes every "i" for "I"
lang.domain <- "statistics"
print(gsub("i", "I", lang.domain) )

[1] "statIstIcs"

lang.domain <- "statistics"
print(substr(lang.domain, 1,4))

[1] "stat"

# combines character strings
lang.domain <- "statistics"
print(paste("R does ", lang.domain, "!!!"))

[1] "R does  statistics !!!"

if ((2+2==4) && (2*2==4)) {
    print("very good")
}

[1] "very good"

# gsub help page
?gsub

 # gsubの例を出す
example(gsub)

# get the first value from a vector
our.vect <- c(8, 2, 3)
our.vect[1] = 9
print(our.vect)

[1] 9 2 3

our.vect <- c(8, 2, 3)
print(length(our.vect))

[1] 3

our.vect <- c(8, 2, 3)
# 二個目の要素、一個目の要素、３個目の要素
print(our.vect[c(2,1,3)] )

[1] 2 8 3

other.vector <- 1:5
print(other.vector)

[1] 1 2 3 4 5

# 10と0は含まれる
another.vector <- seq(10,0, by=-2)
print(another.vector)

[1] 10  8  6  4  2  0

another.vector <- c(5,4,3,2,1)
# 2から4までで要素を取得
print(another.vector[2:4])

[1] 4 3 2

print(max(1:10))
print(min(1:10))
print(sum(1:10))
print(mean(1:10))
# standard  deviation
print(sd(1:10))

[1] 10
[1] 1
[1] 55
[1] 5.5
[1] 3.02765

messy.vector <- c(2,4,NA,4)
print(sum(messy.vector)) # NA is not allowed
print(sum(messy.vector, na.rm=TRUE))
print(sum(messy.vector, na.rm=FALSE))

[1] NA
[1] 10
[1] NA

messy.vector <- c(2,4,NA,4)
# NAであるかどうかチェック
print(is.na(messy.vector))

[1] FALSE FALSE  TRUE FALSE

messy.vector <- c(2,4,NA,4)
# get a count of the number of NA
print(sum(is.na(messy.vector)) )

[1] 1

our.vect <- c(8, 2, 3)
# 8だけ5より大きい
print(our.vect > 5)
print(typeof(our.vect > 5))
print(sum(our.vect>5))

[1]  TRUE FALSE FALSE
[1] "logical"
[1] 1

messy.vector <- c(2,4,NA,4)
# NAでない要素を抽出
print(messy.vector[!is.na(messy.vector)])

[1] 2 4 4

messy.vector <- c(2,4,NA,4)
# NAのところ０で埋める
messy.vector[is.na(messy.vector)] <- 0
print(messy.vector)

[1] 2 4 0 4

# extract every other digit 
our.vect <- c(8, 2, 3)
print(our.vect[c(TRUE, FALSE)])

[1] 8 3

# 関数の作り方
is.divisible.by <- function(large.number, smaller.number) {
    if (large.number %% smaller.number != 0)
        # FALSEの括弧が必要
        return (FALSE) 
    return (TRUE)
}

is.even <- function(num) {
    is.divisible.by(num, 2)
}

our.vect <- c(8, 2, 3, 6)
print(sapply(our.vect, is.even))

# sapplyの第二の引数直接関数を実装する
print( sapply(our.vect, function(num){is.divisible.by(num, 3)})  )

[1]  TRUE  TRUE FALSE  TRUE
[1] FALSE FALSE  TRUE  TRUE

# 上で定義した関数を利用して、要素を抽出
our.vect <- c(8, 2, 3, 6)
where.even <- sapply(our.vect, is.even)
where.div.3 <- sapply(our.vect, function(num){
    is.divisible.by(num, 3)
})

our.vect[where.even & where.div.3]

# R has the matrix data structures
a.matrix <- matrix(c(2,3,4,5))
print(a.matrix)

     [,1]
[1,]    2
[2,]    3
[3,]    4
[4,]    5

a.matrix <- matrix(c(2,3,4,5), ncol=2)
print(a.matrix)

     [,1] [,2]
[1,]    2    4
[2,]    3    5

# bind two vectors
# colume bind 
a2.matrix <- cbind(c(2,3,4), c(4,5,6))
print(a2.matrix)

     [,1] [,2]
[1,]    2    4
[2,]    3    5
[3,]    4    6

# row bind
a3.matrix <- rbind(c(2,3,4), c(4,5,6))
print(a3.matrix)

     [,1] [,2] [,3]
[1,]    2    3    4
[2,]    4    5    6

a3.matrix <- rbind(c(2,3,4), c(4,5,6))
# matrix transposition
print(t(a3.matrix))

     [,1] [,2]
[1,]    2    4
[2,]    3    5
[3,]    4    6

a3.matrix <- rbind(c(2,3,4), c(4,5,6))
print(a3.matrix)

# if vectors have sapply(), then matrices have apply()
# 1 for row
print(apply(a3.matrix, 1, sum) ) 
# 2 for  colume
print(apply(a3.matrix, 2, mean) )

     [,1] [,2] [,3]
[1,]    2    3    4
[2,]    4    5    6
[1]  9 15
[1] 3 4 5

Data frameのwhen変数に10/2のような文字列を日付に変更する

mutate(df, when = as.Date(when, "%m/%d")) # 2018-10-02のような日付になる。