read in data from a URL using the readr package. make sure permalink directs to raw data on Github.
url <- read_csv("https://raw.githubusercontent.com/reilly-lab/reilly-lab.github.io/master/BoyGirl.csv", col_names=TRUE)
print(url)
## # A tibble: 3 x 2
## boy girl
## <int> <int>
## 1 1 4
## 2 2 5
## 3 3 6
N=50 items sampled from a random normal distribution with a mean of 50, sd=5
set.seed(123)
vec <- rnorm(50, mean=50, sd=5)
head(vec, n=5)
## [1] 47.19762 48.84911 57.79354 50.35254 50.64644
populate it with numbers from a random normal distribution (mean=0, sd=1), change it to a tibble, view the first five rows
dat <- as_tibble(data.frame(cbind("a"=rnorm(10), "b"=rnorm(10), "c"=rnorm(10))))
head(dat)
## # A tibble: 6 x 3
## a b c
## <dbl> <dbl> <dbl>
## 1 0.253 0.380 -0.491
## 2 -0.0285 -0.502 -2.31
## 3 -0.0429 -0.333 1.01
## 4 1.37 -1.02 -0.709
## 5 -0.226 -1.07 -0.688
## 6 1.52 0.304 1.03
here’s another dataframe created with the replicate and sample function. The sample function is: sample(x, size, replace = FALSE)
set.seed(190)
dat.other <- data.frame(replicate(2,sample(0:100,100,replace=TRUE)))
head(dat.other, n=5)
## X1 X2
## 1 69 18
## 2 71 88
## 3 6 92
## 4 93 100
## 5 91 99
generate a vector of letters that will serve as a grouping variable and bind it to dat
letvec <- c(rep("a", 5), rep("b",5))
mat.b <- cbind(dat, letvec)
head(mat.b)
## a b c letvec
## 1 0.25331851 0.3796395 -0.4910312 a
## 2 -0.02854676 -0.5023235 -2.3091689 a
## 3 -0.04287046 -0.3332074 1.0057385 a
## 4 1.36860228 -1.0185754 -0.7092008 a
## 5 -0.22577099 -1.0717912 -0.6880086 a
## 6 1.51647060 0.3035286 1.0255714 b
str(mat.b)
## 'data.frame': 10 obs. of 4 variables:
## $ a : num 0.2533 -0.0285 -0.0429 1.3686 -0.2258 ...
## $ b : num 0.38 -0.502 -0.333 -1.019 -1.072 ...
## $ c : num -0.491 -2.309 1.006 -0.709 -0.688 ...
## $ letvec: Factor w/ 2 levels "a","b": 1 1 1 1 1 2 2 2 2 2
in the dataframe we just created, from “a” to “SomethingElse”
new.dat <- dat %>% rename(SomethingElse = a)
head(new.dat, n=5)
## # A tibble: 5 x 3
## SomethingElse b c
## <dbl> <dbl> <dbl>
## 1 0.253 0.380 -0.491
## 2 -0.0285 -0.502 -2.31
## 3 -0.0429 -0.333 1.01
## 4 1.37 -1.02 -0.709
## 5 -0.226 -1.07 -0.688
using the dplyr pipe %>%, then rename the column headers
melt.dat <- new.dat %>% melt(measure.vars=1:3, variable.name="group", value.name="iq")
head(melt.dat)
## group iq
## 1 SomethingElse 0.25331851
## 2 SomethingElse -0.02854676
## 3 SomethingElse -0.04287046
## 4 SomethingElse 1.36860228
## 5 SomethingElse -0.22577099
## 6 SomethingElse 1.51647060
generate a new dataframe isolating only groups B and C using the filter command in dplyr. Drop the ‘SomethingElse’ observations, refactor the grouping variable.
some.dat <- melt.dat %>% filter(group != "SomethingElse")
some.dat$ group <- factor(some.dat$group)
levels(some.dat$group)
## [1] "b" "c"
populate a 3x3 matrix with random numbers by rows (N=3). set seed for reproducibility), name the rows and columns
set.seed(1234)
mat <- matrix(rnorm(9), nrow=3)
rownames(mat) <- c("a", "b", "c")
colnames(mat) <- c("d", "e", "f")
print(mat)
## d e f
## a -1.2070657 -2.3456977 -0.5747400
## b 0.2774292 0.4291247 -0.5466319
## c 1.0844412 0.5060559 -0.5644520