• 1 Summary of previous sessions and Exercises
    • 1.1 Summary
  • 2 Solution to Exercises
    • 2.1 Replace letter "O" by "Ñ" (or any symbol different from a character already existing in LETTERS) and add "Ñ" between N and O
    • 2.2 Add "Ñ" between N and O
    • 2.3 New Exercise: Now lets repeat this after changing LETTERS into factor
  • 3 Dataframes
    • 3.1 Creating dataframes
  • 4 Lists
    • 4.1 Creating lists

Data.frames and lists

Back to main page

1 Summary of previous sessions and Exercises

1.1 Summary

v1<-c(T,T,F,F,F)
v2<-c(1,2,3,4,5)
v3<-c(LETTERS[1:5])

cbind(v1,v2,v3)
##      v1      v2  v3 
## [1,] "TRUE"  "1" "A"
## [2,] "TRUE"  "2" "B"
## [3,] "FALSE" "3" "C"
## [4,] "FALSE" "4" "D"
## [5,] "FALSE" "5" "E"
rbind(v1,v2,v3)
##    [,1]   [,2]   [,3]    [,4]    [,5]   
## v1 "TRUE" "TRUE" "FALSE" "FALSE" "FALSE"
## v2 "1"    "2"    "3"     "4"     "5"    
## v3 "A"    "B"    "C"     "D"     "E"
matrix(c(v1,v2,v3),ncol=3)
##      [,1]    [,2] [,3]
## [1,] "TRUE"  "1"  "A" 
## [2,] "TRUE"  "2"  "B" 
## [3,] "FALSE" "3"  "C" 
## [4,] "FALSE" "4"  "D" 
## [5,] "FALSE" "5"  "E"
str(cbind(v1,v2,v3))
##  chr [1:5, 1:3] "TRUE" "TRUE" "FALSE" "FALSE" "FALSE" "1" "2" "3" "4" "5" ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:3] "v1" "v2" "v3"
str(rbind(v1,v2,v3))
##  chr [1:3, 1:5] "TRUE" "1" "A" "TRUE" "2" "B" "FALSE" "3" "C" "FALSE" "4" ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:3] "v1" "v2" "v3"
##   ..$ : NULL
str(matrix(c(v1,v2,v3),ncol=3))
##  chr [1:5, 1:3] "TRUE" "TRUE" "FALSE" "FALSE" "FALSE" "1" "2" "3" "4" "5" ...

Back to main page

2 Solution to Exercises

2.1 Replace letter "O" by "Ñ" (or any symbol different from a character already existing in LETTERS) and add "Ñ" between N and O

factorLETTERS <- as.factor(LETTERS)
factorLETTERS[which(factorLETTERS=="O")]<-"Ñ"
## Warning in `[<-.factor`(`*tmp*`, which(factorLETTERS == "O"), value = "Ñ"):
## invalid factor level, NA generated
levels(factorLETTERS)<-c(levels(factorLETTERS),"Ñ")
factorLETTERS[which(factorLETTERS=="O")]<-"Ñ"

2.2 Add "Ñ" between N and O

factorLETTERS <- as.factor(LETTERS)
levels(factorLETTERS)<-c(levels(factorLETTERS),"Ñ")

factorLETTERS[(which(factorLETTERS=="O")+1):c(length(LETTERS)+1)] <-
factorLETTERS[which(factorLETTERS=="O"):length(LETTERS)]
factorLETTERS[which(factorLETTERS=="O")[1]]<-"Ñ"

2.3 New Exercise: Now lets repeat this after changing LETTERS into factor

factorLETTERS <- factor(LETTERS)

Back to main page

3 Dataframes

?data.frame

3.1 Creating dataframes

vec1 <- c(1, 2, 3, 4, 5)
vec3 <- rnorm(5)
vec5 <- paste("number", c("1", "2", "3", "4", "5"), sep="_")

df1 <- data.frame(vec1, vec3, vec5)
df1
##   vec1         vec3     vec5
## 1    1  1.646474838 number_1
## 2    2  0.192996186 number_2
## 3    3 -0.392553554 number_3
## 4    4  0.007051323 number_4
## 5    5 -2.494835444 number_5
mat6 <- matrix(1:16, nrow=4, byrow=T)
as.data.frame(mat6)
##   V1 V2 V3 V4
## 1  1  2  3  4
## 2  5  6  7  8
## 3  9 10 11 12
## 4 13 14 15 16
# Adding columns
df1$f1 <- as.factor(c(rep("A", 3), rep("B", 2)))
df1
##   vec1         vec3     vec5 f1
## 1    1  1.646474838 number_1  A
## 2    2  0.192996186 number_2  A
## 3    3 -0.392553554 number_3  A
## 4    4  0.007051323 number_4  B
## 5    5 -2.494835444 number_5  B
# Pieces of a dataframe
df1[1,]
##   vec1     vec3     vec5 f1
## 1    1 1.646475 number_1  A
df1[,3]
## [1] "number_1" "number_2" "number_3" "number_4" "number_5"
df1$vec3
## [1]  1.646474838  0.192996186 -0.392553554  0.007051323 -2.494835444
df1[,"f1"]
## [1] A A A B B
## Levels: A B
# Same basic functions as in matrices
class(df1)              
## [1] "data.frame"
dim(df1)
## [1] 5 4
ncol(df1)
## [1] 4
nrow(df1)
## [1] 5
dimnames(df1)
## [[1]]
## [1] "1" "2" "3" "4" "5"
## 
## [[2]]
## [1] "vec1" "vec3" "vec5" "f1"
colnames(df1)
## [1] "vec1" "vec3" "vec5" "f1"
str(df1)
## 'data.frame':    5 obs. of  4 variables:
##  $ vec1: num  1 2 3 4 5
##  $ vec3: num  1.64647 0.193 -0.39255 0.00705 -2.49484
##  $ vec5: chr  "number_1" "number_2" "number_3" "number_4" ...
##  $ f1  : Factor w/ 2 levels "A","B": 1 1 1 2 2
# Applying functions to parts of a dataframe
apply(df1, 2, mean)
## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(newX[, i], ...): argument is not numeric or logical:
## returning NA
## vec1 vec3 vec5   f1 
##   NA   NA   NA   NA
apply(df1[,1:2], 2, mean)
##       vec1       vec3 
##  3.0000000 -0.2081733
tapply(df1$vec3, df1$f1, mean)
##          A          B 
##  0.4823058 -1.2438921

Back to main page

4 Lists

?list

4.1 Creating lists

arr2 <- array(1:100, dim=c(10, 2, 5))
str(arr2)
##  int [1:10, 1:2, 1:5] 1 2 3 4 5 6 7 8 9 10 ...
ls1 <- list(vec1, vec3, mat6, arr2, df1)
ls1
## [[1]]
## [1] 1 2 3 4 5
## 
## [[2]]
## [1]  1.646474838  0.192996186 -0.392553554  0.007051323 -2.494835444
## 
## [[3]]
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12
## [4,]   13   14   15   16
## 
## [[4]]
## , , 1
## 
##       [,1] [,2]
##  [1,]    1   11
##  [2,]    2   12
##  [3,]    3   13
##  [4,]    4   14
##  [5,]    5   15
##  [6,]    6   16
##  [7,]    7   17
##  [8,]    8   18
##  [9,]    9   19
## [10,]   10   20
## 
## , , 2
## 
##       [,1] [,2]
##  [1,]   21   31
##  [2,]   22   32
##  [3,]   23   33
##  [4,]   24   34
##  [5,]   25   35
##  [6,]   26   36
##  [7,]   27   37
##  [8,]   28   38
##  [9,]   29   39
## [10,]   30   40
## 
## , , 3
## 
##       [,1] [,2]
##  [1,]   41   51
##  [2,]   42   52
##  [3,]   43   53
##  [4,]   44   54
##  [5,]   45   55
##  [6,]   46   56
##  [7,]   47   57
##  [8,]   48   58
##  [9,]   49   59
## [10,]   50   60
## 
## , , 4
## 
##       [,1] [,2]
##  [1,]   61   71
##  [2,]   62   72
##  [3,]   63   73
##  [4,]   64   74
##  [5,]   65   75
##  [6,]   66   76
##  [7,]   67   77
##  [8,]   68   78
##  [9,]   69   79
## [10,]   70   80
## 
## , , 5
## 
##       [,1] [,2]
##  [1,]   81   91
##  [2,]   82   92
##  [3,]   83   93
##  [4,]   84   94
##  [5,]   85   95
##  [6,]   86   96
##  [7,]   87   97
##  [8,]   88   98
##  [9,]   89   99
## [10,]   90  100
## 
## 
## [[5]]
##   vec1         vec3     vec5 f1
## 1    1  1.646474838 number_1  A
## 2    2  0.192996186 number_2  A
## 3    3 -0.392553554 number_3  A
## 4    4  0.007051323 number_4  B
## 5    5 -2.494835444 number_5  B
ls1 <- list(vec1=vec1, vec3=vec3, my.matrix=mat6, the.array=arr2, DF=df1)
ls1
## $vec1
## [1] 1 2 3 4 5
## 
## $vec3
## [1]  1.646474838  0.192996186 -0.392553554  0.007051323 -2.494835444
## 
## $my.matrix
##      [,1] [,2] [,3] [,4]
## [1,]    1    2    3    4
## [2,]    5    6    7    8
## [3,]    9   10   11   12
## [4,]   13   14   15   16
## 
## $the.array
## , , 1
## 
##       [,1] [,2]
##  [1,]    1   11
##  [2,]    2   12
##  [3,]    3   13
##  [4,]    4   14
##  [5,]    5   15
##  [6,]    6   16
##  [7,]    7   17
##  [8,]    8   18
##  [9,]    9   19
## [10,]   10   20
## 
## , , 2
## 
##       [,1] [,2]
##  [1,]   21   31
##  [2,]   22   32
##  [3,]   23   33
##  [4,]   24   34
##  [5,]   25   35
##  [6,]   26   36
##  [7,]   27   37
##  [8,]   28   38
##  [9,]   29   39
## [10,]   30   40
## 
## , , 3
## 
##       [,1] [,2]
##  [1,]   41   51
##  [2,]   42   52
##  [3,]   43   53
##  [4,]   44   54
##  [5,]   45   55
##  [6,]   46   56
##  [7,]   47   57
##  [8,]   48   58
##  [9,]   49   59
## [10,]   50   60
## 
## , , 4
## 
##       [,1] [,2]
##  [1,]   61   71
##  [2,]   62   72
##  [3,]   63   73
##  [4,]   64   74
##  [5,]   65   75
##  [6,]   66   76
##  [7,]   67   77
##  [8,]   68   78
##  [9,]   69   79
## [10,]   70   80
## 
## , , 5
## 
##       [,1] [,2]
##  [1,]   81   91
##  [2,]   82   92
##  [3,]   83   93
##  [4,]   84   94
##  [5,]   85   95
##  [6,]   86   96
##  [7,]   87   97
##  [8,]   88   98
##  [9,]   89   99
## [10,]   90  100
## 
## 
## $DF
##   vec1         vec3     vec5 f1
## 1    1  1.646474838 number_1  A
## 2    2  0.192996186 number_2  A
## 3    3 -0.392553554 number_3  A
## 4    4  0.007051323 number_4  B
## 5    5 -2.494835444 number_5  B
# Reviewing lists
dim(ls1)
## NULL
length(ls1)
## [1] 5
str(ls1)
## List of 5
##  $ vec1     : num [1:5] 1 2 3 4 5
##  $ vec3     : num [1:5] 1.64647 0.193 -0.39255 0.00705 -2.49484
##  $ my.matrix: int [1:4, 1:4] 1 5 9 13 2 6 10 14 3 7 ...
##  $ the.array: int [1:10, 1:2, 1:5] 1 2 3 4 5 6 7 8 9 10 ...
##  $ DF       :'data.frame':   5 obs. of  4 variables:
##   ..$ vec1: num [1:5] 1 2 3 4 5
##   ..$ vec3: num [1:5] 1.64647 0.193 -0.39255 0.00705 -2.49484
##   ..$ vec5: chr [1:5] "number_1" "number_2" "number_3" "number_4" ...
##   ..$ f1  : Factor w/ 2 levels "A","B": 1 1 1 2 2
# Adding pieces
ls1$f10 <- df1$f1*10
## Warning in Ops.factor(df1$f1, 10): '*' not meaningful for factors
ls1$f10 <- df1$f1*10
## Warning in Ops.factor(df1$f1, 10): '*' not meaningful for factors
ls1$vec1.multi10 <- vec1*10
str(ls1)
## List of 7
##  $ vec1        : num [1:5] 1 2 3 4 5
##  $ vec3        : num [1:5] 1.64647 0.193 -0.39255 0.00705 -2.49484
##  $ my.matrix   : int [1:4, 1:4] 1 5 9 13 2 6 10 14 3 7 ...
##  $ the.array   : int [1:10, 1:2, 1:5] 1 2 3 4 5 6 7 8 9 10 ...
##  $ DF          :'data.frame':    5 obs. of  4 variables:
##   ..$ vec1: num [1:5] 1 2 3 4 5
##   ..$ vec3: num [1:5] 1.64647 0.193 -0.39255 0.00705 -2.49484
##   ..$ vec5: chr [1:5] "number_1" "number_2" "number_3" "number_4" ...
##   ..$ f1  : Factor w/ 2 levels "A","B": 1 1 1 2 2
##  $ f10         : logi [1:5] NA NA NA NA NA
##  $ vec1.multi10: num [1:5] 10 20 30 40 50
# Pieces of a list
ls1$DF
##   vec1         vec3     vec5 f1
## 1    1  1.646474838 number_1  A
## 2    2  0.192996186 number_2  A
## 3    3 -0.392553554 number_3  A
## 4    4  0.007051323 number_4  B
## 5    5 -2.494835444 number_5  B
ls1[["DF"]]
##   vec1         vec3     vec5 f1
## 1    1  1.646474838 number_1  A
## 2    2  0.192996186 number_2  A
## 3    3 -0.392553554 number_3  A
## 4    4  0.007051323 number_4  B
## 5    5 -2.494835444 number_5  B
ls1[[10]]
## Error in ls1[[10]]: subscript out of bounds
# Practical use of lists
arr2 <- array(1:100, dim=c(10, 2, 5), 
              dimnames = list(paste("raw", 1:10), 
                              paste("col", 1:2),
                              paste("obj", 1:5)))

# Applying functions to parts of a list
lapply(ls1, summary)
## $vec1
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       2       3       3       4       5 
## 
## $vec3
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -2.494835 -0.392554  0.007051 -0.208173  0.192996  1.646475 
## 
## $my.matrix
##        V1           V2           V3           V4    
##  Min.   : 1   Min.   : 2   Min.   : 3   Min.   : 4  
##  1st Qu.: 4   1st Qu.: 5   1st Qu.: 6   1st Qu.: 7  
##  Median : 7   Median : 8   Median : 9   Median :10  
##  Mean   : 7   Mean   : 8   Mean   : 9   Mean   :10  
##  3rd Qu.:10   3rd Qu.:11   3rd Qu.:12   3rd Qu.:13  
##  Max.   :13   Max.   :14   Max.   :15   Max.   :16  
## 
## $the.array
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   25.75   50.50   50.50   75.25  100.00 
## 
## $DF
##       vec1        vec3               vec5           f1   
##  Min.   :1   Min.   :-2.494835   Length:5           A:3  
##  1st Qu.:2   1st Qu.:-0.392554   Class :character   B:2  
##  Median :3   Median : 0.007051   Mode  :character        
##  Mean   :3   Mean   :-0.208173                           
##  3rd Qu.:4   3rd Qu.: 0.192996                           
##  Max.   :5   Max.   : 1.646475                           
## 
## $f10
##    Mode    NA's 
## logical       5 
## 
## $vec1.multi10
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      10      20      30      30      40      50

Back to main page