Rの細かいTipsまとめ（小さいTipsの寄せ集め）

Jan 10, 2018 / Mar 3, 2026 · 7 分で読了 · R Tips ggplot2 ·

共有する:

概要

独立した記事にはならないが、それぞれ便利かつ重要な小さなRのTipsを紹介。

チートシート

ggplot2

`qplot()`

基本

qplot(x=Sepal.Width, y=Sepal.Length, data=iris, geom="point", color=Species) # 散布図
qplot(x=date, y=unemploy, data=economics, geom="line") # 折れ線グラフ
qplot(x=feed, data=chickwts, geom="bar") # 棒グラフ
qplot(x=Sepal.Width, data=iris, binwidth=0.3, geom="histogram") # ヒストグラム
qplot(x=Sepal.Width, data=iris, geom="density") # 密度プロット
qplot(y=Sepal.Length, data=iris, geom="boxplot") # 箱ひげ図
qplot(x=Species, y=Sepal.Length, data=iris, geom="boxplot") # 箱ひげ図xy
qplot(x=Species, y=Sepal.Length, data=iris, geom="violin") # バイオリンプロット

高度なチャート

qplot(x=Sepal.Width, y=Sepal.Length, data=iris, geom=c("point", "smooth"), facets = round(Petal.Width) ~ Species) # 共変量プロット
qplot(x=Species, y=Sepal.Length, data=iris, geom='boxplot', facets = round(Sepal.Width) ~ round(Petal.Width))

見た目をカスタマイズ

qplot(x=Sepal.Width, y=Sepal.Length, data=iris, color=Species, geom="point", xlim=c(0,NA), ylim=c(0,NA), main="散布図の例", xlab="がくの幅", ylab="がくの長さ")

`ggplot()`

基本

p1 <- ggplot(iris, aes(x=Sepal.Width, y=Sepal.Length, color=Species)) + geom_point() # 散布図
p2 <- ggplot(economics, aes(x=date, y=unemploy)) + geom_line() # 折れ線グラフ
p3 <- ggplot(chickwts, aes(x=feed)) + geom_bar() # 棒グラフ
p4 <- ggplot(iris, aes(x=Sepal.Width)) + geom_histogram(binwidth=0.3) # ヒストグラム
p5 <- ggplot(iris, aes(x=Sepal.Width)) + geom_density() # 密度プロット
p6 <- ggplot(iris, aes(y=Sepal.Length)) + geom_boxplot() # 箱ひげ図
p7 <- ggplot(iris, aes(x=Species, y=Sepal.Length)) + geom_boxplot() # 箱ひげ図xy
p8 <- ggplot(iris, aes(x=Species, y=Sepal.Length)) + geom_violin() + geom_boxplot(width=.1, fill="black", outlier.colour=NA) + stat_summary(fun=median, geom="point", fill="white", shape=21, size=2.5) # バイオリンプロット

高度なチャート

ggplot(iris, aes(y=Sepal.Length, x=Sepal.Width)) + geom_point() + geom_smooth() + facet_grid(round(Petal.Width) ~ Species) # 共変量プロット
ggplot(iris, aes(y=Sepal.Length, x=Sepal.Width)) + geom_point() + geom_smooth() + facet_wrap( ~ Species) # 並べるだけ

追加要素

ggplot(iris, aes(x=Sepal.Width, y=Sepal.Length, color=Species)) + geom_point() +
  ggtitle("散布図の例") + xlab("がくの幅") + ylab("がくの長さ") +
  geom_vline(xintercept = 3, color = "red") +
  geom_hline(yintercept = 7, color = "blue") +
  geom_abline(slope = .5, intercept = 4)

複数のチャートをレイアウト

require(patchwork)
(p1+p3)/p2/(p4+p5+p6)/(p7+p8)

並列処理

doParallel/foreachを使った並列処理最初におまじない

require(doParallel)
require(foreach)
cores <- makeCluster(detectCores(), type='PSOCK')
system <- Sys.info()['sysname']
cl <- NULL
if (system == 'Windows') {
    cl <- makeCluster(getOption('cl.cores', cores))
    registerDoParallel(cl)
    registerDoSEQ()
    on.exit(stopCluster(cl))
} else {
    options('mc.cores' = cores)
    registerDoParallel(cores)
}

ループ処理

coefs <- foreach(i = 1:47, # ループのインデクス
                 .packages=.packages(),
                 .export=ls(envir=parent.frame()),
                 .combine=rbind # デフォルトで結果はリストに追加していくが、結果が行単位になっている場合で行追加する場合はこのようにする
                 ) %dopar% {
  bsts_model = bsts_models[[i]]
     :
  coef <- data.frame(coef, row.names=NULL)
  coef$varname <- varnames
  coef[coef$varname!='(Intercept)',c(6,7,1:5,8)] # 最後の戻り値が結果のオブジェクトに追記されていく
}

変数を含むコマンドを実行する

1行

.file <- "result"
.path <- "//tsclient/c/home/downloads/"
.str_cmd <- paste0("x <- fread('", .path, .file, ".csv')")
eval(parse(text=.str_cmd))

複数行

.file <- "result"
.path <- "//tsclient/c/home/downloads/"
.str_cmd <- paste0(".tmp <- fread('", .path, .file, ".csv')"); eval(parse(text=.str_cmd)) # 一時変数に代入
.tmp <- .tmp |> ... # 具体的な処理（ネイティブパイプ）
.str_cmd <- paste0(".tmp -> t_", .f); eval(parse(text=.str_cmd)) # 一時変数から戻す

ベクトル

ベクトルのノルム

m <- matrix(rnorm(20), nc=4) # 例で使う行列を生成
norm1 <- drop(crossprod(abs(m), rep(1, nrow(m)))) # マンハッタンノルム
norm2 <- drop(crossprod(m^2, rep(1, nrow(m)))) # ユークリッドノルム

集合

ids1_or_ids2 <- union(ids1, ids2) # 和集合
ids1_and_ids2 <- intersect(ids1, ids2) # 積集合
ids1_minus_ids2 <- setdiff(ids1, ids2) # 差集合（ids1 - ids2）
setequal(ids1, ids2) # 集合として等しいかどうか
id %in% ids # 集合に含まれるかどうか

特定の条件に当てはまるインデックスを抽出

vec2[which(vec1<10)] # vec1<10になるvec2の値
params[which.min(result$loss_mse),] # loss_mseが最小になる行
params[which.max(result$gain_auc),] # gain_aucが最大になる行

オブジェクトが同一かどうかを検証

identical(x1, x2) # 全く同じ
all.equal(x1, x2) # ほぼ同じ

サンプリング

ランダムダンプリング

stratified sampling（層化抽出法）

require(sampling)
strata(data, stratanames=c('層化に使うカラム1', '層化に使うカラム2'), size=c(カラム1の抽出率, カラム2の抽出率), method=抽出方法)

抽出方法は

'srswor': 非復元ランダムサンプリング（デフォルト）
'srswr': 復元ランダムサンプリング
'poisson': ポアソンサンプリング
'systematic': 系統抽出

Rで文字列をコマンドとして実行する（eval）

Rの繰り返し処理などでコマンドの一部のみを書き換えて同様の処理を行いたい、たとえば対象のデータフレームのみ変えて同じ処理を実行したい場合がある。そういう場合には動的な部分（データフレームオブジェクト名）を変数として含むコマンドの文字列を生成し、その文字列をコマンドとして実行することになる。言葉にするとややこしいが、他の言語でもあるeval処理である。

コマンドが１行の場合

最も単純なケースである。

基本形

ファイル名に動的部分を含むテキストファイル（activity_type1.txt）を読み込み、名前に動的部分を含むテーブル（t_activity_type1）に格納する例

.f <- 'activity_type1'
eval(parse(text=paste0("t_", .f, " <- fread('/path/to/datadir/", .f, ".txt', encoding='UTF-8', sep='\t', na.strings='')")))

.fが動的部分で、ファイル名である。そして動的部分を

.f <- 'activity_type2'

などと変えても同様に実行できる。 eval(parse(text=paste0("固定文字列", 動的部分, "固定文字列"))) が基本になる。paste0()関数がパーツを結合して文字列を生成する関数で、それ以外はおまじないだと思っておけばいい。コマンドの固定文字列部分にクォーテーションマーク'を含む場合、文字列全体はダブルクォーテーションマーク"で囲む。

コマンド部分のみ文字列変数として切り出してソースを見やすくする

この記法だとソースコードの中で動的部分が見えにくく、コマンドの一部を書き換えたり繰り返し利用したりするのに向かない（特にカッコの数がわからなくなる）ので、コマンド部分を変数として取り出したのが以下になる。

.f <- 'activity_type1'
.str_cmd <- paste0("t_", .f, " <- fread('/path/to/datadir/", .f, ".csv', encoding='UTF-8', sep='\t', na.strings='')")
eval(parse(text=.str_cmd))

.str_cmdがコマンドの文字列になる。こうすると

.f <- 'activity_type1'
.str_cmd <- paste0("t_", .f, " <- fread('/path/to/datadir/", .f, ".csv', encoding='UTF-8', sep='\t', na.strings='')")
eval(parse(text=.str_cmd))
.str_cmd <- paste0("t_", .f, " <- t_", .f, " |> mutate(across(ends_with('_date'), as.Date))")
eval(parse(text=.str_cmd))

のように動的コマンドを複数生成して実行しやすくなる。

複数行のコマンドを実行する場合

上記の方法だと繰り返しeval(parse(text=.str_cmd))が必要になってしまう。動的に実行するコマンドが複数行の場合でも毎回必要になり、あまりイケてない。そこで一時ファイルを生成して、一時ファイルに対して処理を行う。

.f <- 'activity_type1'
.str_cmd <- paste0(".tmp <- fread('/path/to/datadir/", .f, ".csv', encoding='UTF-8', sep='\t', na.strings='')")
eval(parse(text=.str_cmd)) # .tmpという一時変数に代入
# .tmpに対する処理
.tmp <- .tmp |>
  mutate(across(ends_with('_date'), as.Date)) |>
  mutate(across(ends_with('_type'), as.factor)) |>
  mutate(across(where(is.logical), as.integer))
# .tmpの内容を個別の永続テーブルに入れる
.str_cmd <- paste0(".tmp -> t_", .f); eval(parse(text=.str_cmd))

.tmpに対する処理の部分が動的ではない普通のコマンドとなり、ソースコードの可読性も上がり、再利用やメンテナンスもしやすくなった。シンプルなコマンド１行であれば動的にする必要もあまりないが、複数行のコマンドを繰り返し実行したい場合にはこの方法が力を発揮する。

forループの中で実行する

以上の例を複数のファイルに対して繰り返し実行する。動的な部分をベクトルに代入し、forループの中に先の処理を入れる。

# 動的な部分の定義
filenames <- c('activity_type1', 'activity_type7', 'activity_click', 'event_unsubscribe', 'event_old')
# 繰り返し
for (.f in filenames) {
  # いったんdata.tableオブジェクトとして取り込む
  .str_cmd <- paste0("t_", .f, " <- fread('/path/to/datadir/", .f, ".csv', encoding='UTF-8', sep='\t', na.strings='')")
  eval(parse(text=.str_cmd))
  # 作業用オブジェクトを生成
  eval(parse(text=paste(".tmp <- t_", .f, sep="")))
  .tmp <- .tmp |>
    mutate(across(ends_with('_date'), as.Date)) |>
    mutate(across(ends_with('_type'), as.factor)) |>
    mutate(across(where(is.logical), as.integer))
  .tmp[mail_type %like% '購入お礼', c('mail_type', 'mail_type2', 'mail_type3', 'channel'):=data.table('SAF', '購入お礼', NA, NA)]
  # 作業用オブジェクトから元のオブジェクトに戻す
  eval(parse(text=paste0(".tmp -> t_", .f)))
}

ソースコードがかなり洗練される。

# 条件に合致したオブジェクトに対して一括処理
for (.i in ls(pattern='t1_.*_r_')) {
  .str_cmd <- paste('class(', .i, ') <- "matrix"', sep='')
  eval(parse(text=.str_cmd))
}

エラーが発生しても停止しない（try-catch）

try(for (i in unique(creatives.sampled$campaignId)) {
  for (j in adsize.sampled) {
    assign(paste0("result", i, "_", j, "_20161010"), myFunc(i, j, "2016-10-10"))
  }
})

オブジェクトの削除、ロード

rm(list=ls(pattern="^result.*")) # 正規表現にマッチしたオブジェクトのみ一括で削除
rm(list=ls(all=T)) # ドットで始まる隠しオブジェクトを含むすべてのオブジェクトを削除
for(f in system("ls prcomp*.RData", TRUE)) load(f) # ローカルの特定の形式に当てはまるファイル名のイメージを一括ロード

RのデータフレームとPandasのデータフレームの間でデータをやり取りする

RとPythonの間でのデータをやり取りするとき、CSVファイルではなく変数の型などを内包したデータフレームで直接やりとりできると便利。これを実現するのがApache Arrow形式。従来のfeatherパッケージは非推奨となり、arrowパッケージ（Apache Arrow）に移行済み。Parquet形式も推奨される。

インストール

Rで

install.packages('arrow')

Pythonで

pip install pyarrow

使う

Rで（arrowパッケージ）

require(arrow)
arrow::write_feather(iris, 'iris.feather') # 書き出し（Feather形式）
iris2 <- arrow::read_feather('iris.feather') # 読み込み
arrow::write_parquet(iris, 'iris.parquet') # Parquet形式（推奨、圧縮でファイルサイズ削減）
iris3 <- arrow::read_parquet('iris.parquet') # 読み込み

Pythonで（pyarrow）

import pyarrow as pa
import pyarrow.feather as feather
import pandas as pd
iris_df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv') # irisのデータセットを準備
feather.write_feather(pa.Table.from_pandas(iris_df), 'iris.feather') # 書き出し
iris_df = feather.read_feather('iris.feather').to_pandas() # 読み込み
# Parquet形式の場合
import pyarrow.parquet as pq
pq.write_table(pa.Table.from_pandas(iris_df), 'iris.parquet')
iris_df = pq.read_table('iris.parquet').to_pandas()

python

Rで郵便番号マスタを作成する（データフレーム、data.table両対応）

分析用のデータの中には地域名がほしい形（都道府県名、市区町村名で区切られた形）で含まれておらず、一方で郵便番号が含まれていることもある。その場合郵便番号から地域名を取得することになるが、その都度そのコードを書くのは面倒。しかも郵便番号が更新されることもあるので、常に最新版を持ってくるようにしたい。そこでコピペでそのまま郵便番号マスタを生成できるコードを用意した。日本郵便の郵便番号データから最新版のCSVを取得してきて、郵便番号マスタを生成するものである。これがあればRの中で常に最新版の郵便番号マスタを生成できる。分析用データと結合して地域名を付けることも簡単である。日本郵便の郵便番号データから最新版のCSVを取得する。 CSVファイルに含まれるカラムと形式の詳細は以下参照。 https://www.post.japanpost.jp/zipcode/dl/readme.html 必要なカラムのみ取得すればいい。ここでは郵便番号（2列目）と市区町村コード（1列目）、都道府県名（7列目）、市区町村名（8列目）を取得する。なお郵便番号の形式はハイフンを含まない7ケタの数字列。

データフレームで読み込む場合

require(curl)
temp <- tempfile()
download.file("https://www.post.japanpost.jp/zipcode/dl/kogaki/zip/ken_all.zip", temp)
files <- unzip(temp)
df.zipcode <- read.csv(files[1], na.strings="", header=F, skip=0, fileEncoding = "shift-jis", colClasses = c("character", "NULL", "character", "NULL", "NULL", "NULL", "character", "character", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL"))
unlink(files)
unlink(temp)
rm(files, temp)
colnames(df.zipcode) <- c("citycode","zipcode","prefecture","city")

ちなみにdata.tableを使う場合はこんな感じ。 CSVファイルのエンコーディングがShift-JISでfread()が対応していないため、一度read.csv()でデータフレームとして読み込んでからdata.tableに変換する。

data.tableで読み込む場合

require(curl)
require(data.table)
require(dplyr)
temp <- tempfile()
download.file("https://www.post.japanpost.jp/zipcode/dl/kogaki/zip/ken_all.zip", temp)
files <- unzip(temp)
dt.zipcode <- read.csv(files[1], na.strings="", header=F, skip=0, fileEncoding = "shift-jis", colClasses = c("character", "NULL", "character", "NULL", "NULL", "NULL", "character", "character", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL")) |> as.data.table()
unlink(files)
unlink(temp)
rm(files, temp)
setnames(dt.zipcode, c("V1", "V2", "V3", "V4"), c("citycode", "zipcode", "prefecture", "city"))

こんなふうにすれば郵便番号を含むデータから都道府県、市区町村名も紐付けできる。ちなみに元データの郵便番号の形式がハイフンを含む999-9999形式の場合

require(stringr)
require(dplyr)
dt.data <- dt.data |>
  mutate(zipcode = str_replace(zipcode, "([0-9]{3})-([0-9]{4})", "\\1\\2")) |>
  left_join(dt.zipcode)

モデルの選択

anova(単純なモデル, 複雑なモデル)

のp-valueを見る

圧縮ファイルの読み込み

fread("gzip -dc df.txt")

多変量の可視化

表形式で―`ftable()`

度数で

ftable(Species ~ round(Sepal.Width) + round(Sepal.Length), data = iris)

                                       Species setosa versicolor virginica
round(Sepal.Width) round(Sepal.Length)
2                  4                                1          0         0
                   5                                0          4         1
                   6                                0          9         3
                   7                                0          0         1
                   8                                0          0         0
3                  4                                4          0         0
                   5                               23          2         0
                   6                                0         27        24
                   7                                0          8        14
                   8                                0          0         4
4                  4                                0          0         0
                   5                               17          0         0
                   6                                5          0         0
                   7                                0          0         1
                   8                                0          0         2

比率で

round(prop.table(ftable(Species ~ round(Sepal.Width) + round(Sepal.Length), data = iris), margin=1) *100, 2)

margin=1で行方向の合計＝1、margin=2で列方向の合計＝1とする。
useNA="no"でNAは集計対象から除外、useNA="always"でNAも集計対象に含める。

チャートで条件付散布図と箱ひげ図

条件付散布図

coplot(Sepal.Length ~ Sepal.Width | Species * Petal.Width, data=iris, panel = panel.smooth)

箱ひげ図

boxplot(Sepal.Length ~ Species + round(Petal.Width), data=iris, outline = FALSE)

高速化

apply()は内部でforの処理が入っている→
forが遅いのは、都度オブジェクトを拡張するから。for+rbind()などは最悪。
最初にサイズを決めておくと速くなる。apply()を使うよりも速くなることもある。
ベクトル処理は高速

library(Rmpi)
library(snow)

# 1. apply (faster)
mat.matched <- mat.data==apply(mat.data, 1, max)

# 2. for (slower)
mat.matched <- matrix(nrow=nrow(mat.data), ncol=ncol(mat.data))
for(i in 1:nrow(mat.data)){ mat.matched[i,] <- mat.data[i,] == max(mat.data[i,]) }

# 3. parApply
cl <- makeCluster(4, type="SOCK")
clusterExport(cl, "mat.data")
mat.matched <- parApply(cl, mat.data, 1, max)
stopCluster(cl)

https://abicky.net/2012/04/25/090953/

blasの変更

RHEL/CentOS/Amazon Linux（yum）

yum install openblas
alternatives --install /usr/lib64/libblas.so.3 libblas.so.3 /usr/lib64/libopenblas.so.0 30
alternatives --install /usr/lib64/libblas.so.3 libblas.so.3 /usr/lib64/libblas.3.2.1 10
alternatives --config libblas.so.3

Ubuntu/Debian（apt）

sudo apt install libopenblas-dev
sudo update-alternatives --config libblas.so.3-x86_64-linux-gnu

MPIのインストール

yum install -y openmpi-devel

事前にコマンドラインで環境変数をしてから

export LD_LIBRARY_PATH=/usr/lib64/openmpi/lib:$LD_LIBRARY_PATH
R

コンパイル時に3個の引数を渡す

install.packages("Rmpi", configure.args=c("--with-Rmpi-libpath=/usr/lib64/openmpi/lib", "--with-Rmpi-include=/usr/include/openmpi-x86_64", "--with-Rmpi-type=OPENMPI"))
library("Rmpi")
install.packages("snow")
library("snow")

使い方 https://www.sfu.ca/~sblay/R/snow.html

メモリ節約

sparse matix

要素のほとんどがゼロの行列→sparse matrix 普通のmatrixやデータフレーム形式で扱うとすべてのセルに対してメモリが割り当てられるつまり行数×列数だけのメモリが消費されるログデータを集計したもの（1000万UID×10万アイテム）などではメモリが足りないことに行数と列数は膨大な割りに、実は値（の入るところ）が少ない→sparse matrix（疎行列）もっと効率的にメモリを扱えないものか…値の入るセルだけメモリを使えないものか… →これを実現したのが{Matrix}パッケージのsparse matrix形式（dgCMatrixクラスなど）

`NA`に対する処理

`na.rm`

NAを除外
集計関数（max(), min(), range(), mean(), median(), quantile(), var()）で使用。NAが入っているとデフォルトではNAを返すが、na.rm=TでNAを除外してカウントする
cov(), cor()ではna.rmを指定できないので

cov(x[!is.na(x)], y[!is.na(y)])

とする。

`na.omit`

NAを含む行を除外
データフレームで使う

x <- data.frame(a=1:3, b=c(2,NA,4), c=rep(3,3))
na.omit(x)

`tapply()`より`aggregate()`が使いやすい

aggregate(nExpected ~ category + type + audience + period, data = data1, FUN = sum)

aggregate(
    cbind(imp, click, cv) ~ segmentId + creativeId,
    data = creatives.sampled,
    FUN = sum
)

このようにformulaを使った集計ができるし、アウトプットもデータフレームになる。tapply()だと多次元配列になり扱いにくい。 ※tapply()の戻り値はarray

`save()`と`write.table()`の引数の与え方の違い

似た処理なのに同じ引数の与え方で異なる挙動

Rのサンプルデータのわかりやすい解説

WindowsでR

RとRtoolsはスペースを含まないパスに設置する。 \Program Filesなどはダメ。コンパイルエラーになることがある Macなどとの互換性のためにWindowsでは必須

options(encoding='UTF-8')

Rstanのインストール

Windows

R 4.4以降ではRtools44が現行。R 4.5ではRtools45。パッケージをソースからビルドする場合に必要。

Rtoolsのインストール（R 4.4.xの場合）

Rtools44をCRANからダウンロードしてインストール。デフォルトのインストール先C:\rtools44を使用すれば、Rの標準インストーラで入れたRからは追加設定なしでパッケージをビルドできる。

R 4.0系など古いRを使う場合、Rtools40をインストールしたうえで、PATHに${RTOOLS40_HOME}\usr\binを追加する必要がある。以下を実行すると起動時に読み込む.Renvironを生成してくれる。

writeLines('PATH="${RTOOLS40_HOME}\\usr\\bin;${PATH}"', con = "~/.Renviron")

text

確認

Sys.which("make")

Rstanのインストール

rstanはCRANからバイナリで正常にインストールできる。まずは以下を試す。

install.packages("rstan", repos = "https://cloud.r-project.org/", type = "win.binary")

text

ソースからビルドが必要な場合（バイナリが利用できないRのバージョンなど）は、Rtoolsを正しく設定したうえでtype = "source"でインストールする。

CmdStanR（推奨の代替）

Stanを使うもう一つの選択肢としてCmdStanRがある。rstanより軽量で、CmdStanをバックエンドとして使う。インストールは以下。

install.packages("cmdstanr", repos = c("https://stan-dev.r-universe.dev", getOption("repos")))

text

その後、CmdStan本体をインストールする。

cmdstanr::install_cmdstan(cores = 2)

パッケージ管理

# そのセッション内では永続的に
options(Ncpus=4) # 4コアを使って同時に複数パッケージをコンパイル
options(repos = c(CRAN = "https://packagemanager.posit.co/cran/YYYY-MM-DD")) # 特定の日付を指定
options(repos = c("https://mc-stan.org/r-packages/", getOption("repos"))) # レポジトリの追加

# そのパッケージ限りで
## レポジトリを指定してインストール
install.packages("xml2", repos = c("https://mran.revolutionanalytics.com/snapshot/YYYY-MM-DD")) # 特定の日付を指定
install.packages("cmdstanr", repos = c("https://stan-dev.r-universe.dev", getOption("repos"))) # レポジトリの追加

## ソースかバイナリか
install.packages("xml2", repos = c("https://mc-stan.org/r-packages/"), type='source') # ソースからコンパイル
install.packages("xml2", repos = c("https://mc-stan.org/r-packages/"), type='win.binary') # バイナリ

## コンパイル時のオプションの指定
install.packages("xml2", configure.vars = "INCLUDE_DIR=/usr/include/libxml2/libxml LIB_DIR=/usr/lib/x86_64-linux-gnu")
install.packages("xml2", configure.args = '--enable-aaa --bbb=bb')

バージョン指定

devtools::install_version("rstan", version = "2.18.1", repos = "https://cloud.r-project.org/")

text

パッケージを保持してRをアップグレード

R自体をアップグレードすると、古いバージョンで使っていたパッケージが削除されてしまう。それを防ぐ方法。

Rをアップグレードする前に現在のパッケージの一覧を一時ファイルに保存しておく

tmp <- installed.packages()
installedpkgs <- as.vector(tmp[is.na(tmp[,"Priority"]), 1])
save(installedpkgs, file="installed_old.rda")

Rの新しいバージョンをインストールする
新しいバージョンのRを起動し、先に保存した古いパッケージ一覧を読み込んでCRANから再インストール

tmp <- installed.packages()
installedpkgs.new <- as.vector(tmp[is.na(tmp[,"Priority"]), 1])
missing <- setdiff(installedpkgs, installedpkgs.new)
install.packages(missing)
update.packages()

BioConductorのパッケージを使っている場合、追加で以下を実行。biocLite()は廃止済み。BiocManager::install()を使用する。

if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
load("installed_old.rda")
tmp <- installed.packages()
installedpkgs.new <- as.vector(tmp[is.na(tmp[,"Priority"]), 1])
missing <- setdiff(installedpkgs, installedpkgs.new)
BiocManager::install(missing)

https://www.r-bloggers.com/how-to-upgrade-r-without-losing-your-packages/

Rの細かいTipsまとめ（小さいTipsの寄せ集め）

概要

チートシート

ggplot2

qplot()

ggplot()

並列処理

変数を含むコマンドを実行する

ベクトル

ベクトルのノルム

集合

特定の条件に当てはまるインデックスを抽出

オブジェクトが同一かどうかを検証

サンプリング

ランダムダンプリング

stratified sampling（層化抽出法）

Rで文字列をコマンドとして実行する（eval）

コマンドが１行の場合

基本形

コマンド部分のみ文字列変数として切り出してソースを見やすくする

複数行のコマンドを実行する場合

forループの中で実行する

エラーが発生しても停止しない（try-catch）

オブジェクトの削除、ロード

RのデータフレームとPandasのデータフレームの間でデータをやり取りする

インストール

使う

Rで郵便番号マスタを作成する（データフレーム、data.table両対応）

データフレームで読み込む場合

data.tableで読み込む場合

モデルの選択

圧縮ファイルの読み込み

多変量の可視化

表形式で―ftable()

チャートで条件付散布図と箱ひげ図

高速化

blasの変更

MPIのインストール

メモリ節約

sparse matix

NAに対する処理

na.rm

na.omit

tapply()よりaggregate()が使いやすい

save()とwrite.table()の引数の与え方の違い

Rのサンプルデータのわかりやすい解説

WindowsでR

Rstanのインストール

Windows

Rtoolsのインストール（R 4.4.xの場合）

Rstanのインストール

CmdStanR（推奨の代替）

パッケージ管理

バージョン指定

パッケージを保持してRをアップグレード

`qplot()`

`ggplot()`

表形式で―`ftable()`

`NA`に対する処理

`na.rm`

`na.omit`

`tapply()`より`aggregate()`が使いやすい

`save()`と`write.table()`の引数の与え方の違い