read in a csv file from a github url

read in data from a URL using the readr package. make sure permalink directs to raw data on Github.

url <- read_csv("https://raw.githubusercontent.com/reilly-lab/reilly-lab.github.io/master/BoyGirl.csv", col_names=TRUE)
print(url)
## # A tibble: 3 x 2
##     boy  girl
##   <int> <int>
## 1     1     4
## 2     2     5
## 3     3     6

 

generate a vector of random numbers

N=50 items sampled from a random normal distribution with a mean of 50, sd=5

set.seed(123)
vec <- rnorm(50, mean=50, sd=5)
head(vec, n=5)
## [1] 47.19762 48.84911 57.79354 50.35254 50.64644

 

generate a dataframe ‘dat’ populated with random normal dist.

populate it with numbers from a random normal distribution (mean=0, sd=1), change it to a tibble, view the first five rows

dat <- as_tibble(data.frame(cbind("a"=rnorm(10), "b"=rnorm(10), "c"=rnorm(10))))
head(dat)
## # A tibble: 6 x 3
##         a      b      c
##     <dbl>  <dbl>  <dbl>
## 1  0.253   0.380 -0.491
## 2 -0.0285 -0.502 -2.31 
## 3 -0.0429 -0.333  1.01 
## 4  1.37   -1.02  -0.709
## 5 -0.226  -1.07  -0.688
## 6  1.52    0.304  1.03

 

another way to create a dataframe, sampling a specific range

here’s another dataframe created with the replicate and sample function. The sample function is: sample(x, size, replace = FALSE)

set.seed(190)
dat.other <- data.frame(replicate(2,sample(0:100,100,replace=TRUE)))
head(dat.other, n=5)
##   X1  X2
## 1 69  18
## 2 71  88
## 3  6  92
## 4 93 100
## 5 91  99

 

append a grouping variable to dat

generate a vector of letters that will serve as a grouping variable and bind it to dat

letvec <- c(rep("a", 5), rep("b",5))
mat.b <- cbind(dat, letvec)
head(mat.b)
##             a          b          c letvec
## 1  0.25331851  0.3796395 -0.4910312      a
## 2 -0.02854676 -0.5023235 -2.3091689      a
## 3 -0.04287046 -0.3332074  1.0057385      a
## 4  1.36860228 -1.0185754 -0.7092008      a
## 5 -0.22577099 -1.0717912 -0.6880086      a
## 6  1.51647060  0.3035286  1.0255714      b
str(mat.b)
## 'data.frame':    10 obs. of  4 variables:
##  $ a     : num  0.2533 -0.0285 -0.0429 1.3686 -0.2258 ...
##  $ b     : num  0.38 -0.502 -0.333 -1.019 -1.072 ...
##  $ c     : num  -0.491 -2.309 1.006 -0.709 -0.688 ...
##  $ letvec: Factor w/ 2 levels "a","b": 1 1 1 1 1 2 2 2 2 2

 

change a single column name in ‘dat’

in the dataframe we just created, from “a” to “SomethingElse”

new.dat <- dat %>% rename(SomethingElse = a)
head(new.dat, n=5)
## # A tibble: 5 x 3
##   SomethingElse      b      c
##           <dbl>  <dbl>  <dbl>
## 1        0.253   0.380 -0.491
## 2       -0.0285 -0.502 -2.31 
## 3       -0.0429 -0.333  1.01 
## 4        1.37   -1.02  -0.709
## 5       -0.226  -1.07  -0.688

 

melt the dataframe ‘dat’ from wide to long form, write to ‘melt.dat’

using the dplyr pipe %>%, then rename the column headers

melt.dat <- new.dat %>% melt(measure.vars=1:3, variable.name="group", value.name="iq")
head(melt.dat)
##           group          iq
## 1 SomethingElse  0.25331851
## 2 SomethingElse -0.02854676
## 3 SomethingElse -0.04287046
## 4 SomethingElse  1.36860228
## 5 SomethingElse -0.22577099
## 6 SomethingElse  1.51647060

 

filter out a single level of a grouping variable

generate a new dataframe isolating only groups B and C using the filter command in dplyr. Drop the ‘SomethingElse’ observations, refactor the grouping variable.

some.dat <- melt.dat %>% filter(group != "SomethingElse")
some.dat$ group <- factor(some.dat$group)
levels(some.dat$group)
## [1] "b" "c"

 

generate a matrix

populate a 3x3 matrix with random numbers by rows (N=3). set seed for reproducibility), name the rows and columns

set.seed(1234)
mat <- matrix(rnorm(9), nrow=3)
rownames(mat) <- c("a", "b", "c")
colnames(mat) <- c("d", "e", "f")
print(mat)
##            d          e          f
## a -1.2070657 -2.3456977 -0.5747400
## b  0.2774292  0.4291247 -0.5466319
## c  1.0844412  0.5060559 -0.5644520