A quick glimpse into data.table for data manipulation.
Data table syntax is of the form DT[i, j, by]
i
: on which rowj
: what to doby
: group by whatirisDT = as.data.table(iris)
#tables()# show loaded tables
Sorting / ordering rows
setorder(data.table, …)
-
: to sort a variable in descending order
setorder(irisDT, Sepal.Length, Sepal.Width)
irisDT[1:5]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1: 4.3 3.0 1.1 0.1 setosa
2: 4.4 2.9 1.4 0.2 setosa
3: 4.4 3.0 1.3 0.2 setosa
4: 4.4 3.2 1.3 0.2 setosa
5: 4.5 2.3 1.3 0.3 setosa
setorder(irisDT, -Species, Sepal.Width)
irisDT[1:5]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1: 6.0 2.2 5.0 1.5 virginica
2: 4.9 2.5 4.5 1.7 virginica
3: 5.7 2.5 5.0 2.0 virginica
4: 6.3 2.5 5.0 1.9 virginica
5: 6.7 2.5 5.8 1.8 virginica
DT[i, j, by]
i
: on which rows?irisDT[3:4]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1: 5.7 2.5 5 2.0 virginica
2: 6.3 2.5 5 1.9 virginica
# irisDT[3:4,] #row 3 and 4
# irisDT[-(1:5)] #delete rows 1:5
# irisDT[!(1:5)] # using false to delete as well
# irisDT[.N] # last row
# irisDT[1:(.N-10)] # all but the last 10 rows
# irisDT[Species == "virginica"] # based on conditions fulfilled in columns
irisDT[Species %like% "^v"][1:5]# using %like% helper function
irisDT[Petal.Width %between% c(0.3, 0.4)][1:5]# values in an interval
DT[i, j, by]
j
: what to do? -> select columnsirisDT[, Species][1:5] # returns a vector
irisDT[, "Species"][1:5] # returns a dataframe/data.table
irisDT[, -c("Species")]
irisDT[, !c("Species")]
irisDT[, list(Species, LS = Sepal.Length)] # select and rename
# .() is an alias for list
irisDT[, .(Species, SL = Sepal.Length)]
irisDT[Species == "virginica" & Sepal.Length > 7 & Sepal.Width < 3, !c("Species")]
irisDT[Species == "virginica" &
Sepal.Length > 7 &Sepal.Width < 3,.(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)]
DT[i, j, by]
j
: what to do? -> compute new columns preserving existing onesdata.table uses a new operator :=
to add/update/delete columns (by reference)
irisDT[, maxLength := max(Sepal.Length, Petal.Length)][,
minWidth := min(Sepal.Length, Petal.Width)]
irisDT[1:5]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1: 6.0 2.2 5.0 1.5 virginica
2: 4.9 2.5 4.5 1.7 virginica
3: 5.7 2.5 5.0 2.0 virginica
4: 6.3 2.5 5.0 1.9 virginica
5: 6.7 2.5 5.8 1.8 virginica
maxLength minWidth
1: 7.9 0.1
2: 7.9 0.1
3: 7.9 0.1
4: 7.9 0.1
5: 7.9 0.1
LHS := RHS form
irisDT[, c("maxLength", "minWidth") :=
list(
max(Sepal.Length, Petal.Length),
min(Sepal.Width, Petal.Width)
)]
irisDT[1:5]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1: 6.0 2.2 5.0 1.5 virginica
2: 4.9 2.5 4.5 1.7 virginica
3: 5.7 2.5 5.0 2.0 virginica
4: 6.3 2.5 5.0 1.9 virginica
5: 6.7 2.5 5.8 1.8 virginica
maxLength minWidth
1: 7.9 0.1
2: 7.9 0.1
3: 7.9 0.1
4: 7.9 0.1
5: 7.9 0.1
Functional form
irisDT[, `:=` (
maxLength = max(Sepal.Length, Petal.Length),
minWidth = min(Sepal.Width, Petal.Width)
)]
irisDT[1:5]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1: 6.0 2.2 5.0 1.5 virginica
2: 4.9 2.5 4.5 1.7 virginica
3: 5.7 2.5 5.0 2.0 virginica
4: 6.3 2.5 5.0 1.9 virginica
5: 6.7 2.5 5.8 1.8 virginica
maxLength minWidth
1: 7.9 0.1
2: 7.9 0.1
3: 7.9 0.1
4: 7.9 0.1
5: 7.9 0.1
DT[i, j, by]
j
: what to do? -> compute new columns dropping existing onesirisDT[, .(maxLength = pmax(Sepal.Length, Petal.Length),
minWidth = pmin(Sepal.Width, Petal.Width))][1:5]
maxLength minWidth
1: 6.0 1.5
2: 4.9 1.7
3: 5.7 2.0
4: 6.3 1.9
5: 6.7 1.8
newIris <- irisDT[, .(Sepal.Area = Sepal.Width * Sepal.Length,
Petal.Area = Petal.Width * Petal.Length)]
newIris[, Area.Ratio := Petal.Area / Sepal.Area]
newIris[1:5]
Sepal.Area Petal.Area Area.Ratio
1: 13.20 7.50 0.5681818
2: 12.25 7.65 0.6244898
3: 14.25 10.00 0.7017544
4: 15.75 9.50 0.6031746
5: 16.75 10.44 0.6232836
irisDT[, .(meanSepalLength = mean(Sepal.Length),
meanSepalWidth = mean(Sepal.Width),
meanPetalLength = mean(Petal.Length),
meanPetalWidth = mean(Petal.Width))]
meanSepalLength meanSepalWidth meanPetalLength meanPetalWidth
1: 5.843333 3.057333 3.758 1.199333
irisDT[, .(nSamples = .N, nSpecies = uniqueN(Species))
]
nSamples nSpecies
1: 150 3
irisDT[Species == "versicolor",.(
meanSepalLength = mean(Sepal.Length),
meanSepalWidth = mean(Sepal.Width),
meanPetalLength = mean(Petal.Length),
meanPetalWidth = mean(Petal.Width)
)]
meanSepalLength meanSepalWidth meanPetalLength meanPetalWidth
1: 5.936 2.77 4.26 1.326
irisDT[, .(meanSepalLength = mean(Sepal.Length),
meanSepalWidth = mean(Sepal.Width),
meanPetalLength = mean(Petal.Length),
meanPetalWidth = mean(Petal.Width)), by = "Species"]
Species meanSepalLength meanSepalWidth meanPetalLength
1: virginica 6.588 2.974 5.552
2: versicolor 5.936 2.770 4.260
3: setosa 5.006 3.428 1.462
meanPetalWidth
1: 2.026
2: 1.326
3: 0.246
irisDT[, .(nSamples = .N, nSpecies = uniqueN(Species)), by = .(Species)]
Species nSamples nSpecies
1: virginica 50 1
2: versicolor 50 1
3: setosa 50 1
Inner join: return all rows from x where there are matching values in y and all columns from x and y. In case of multiple matches between x and y, all combinations of the matches are returned.
Full join: return all rows and columns from both x and y.
Left join: return all rows from x and all columns from x and y. In case of multiple matches between x and y, all combinations of the matches are returned.
Right join: return all rows from y and all columns from x and y. In case of multiple matches between x and y, all combinations of the matches are returned.
DT[i, on]
i
: join to which data.table?on
: join key columns? id x
1: 1 9
2: 2 12
3: 4 14
4: 5 21
5: 6 8
id y
1: 1 8
2: 3 14
3: 4 19
4: 6 2
5: 6 4
y[x, on = .(id), nomatch = 0]
id y x
1: 1 8 9
2: 4 19 14
3: 6 2 8
4: 6 4 8
merge.data.table(x = x, y = y, by = "id", all = TRUE)
id x y
1: 1 9 8
2: 2 12 NA
3: 3 NA 14
4: 4 14 19
5: 5 21 NA
6: 6 8 2
7: 6 8 4
merge.data.table(x = x, y = y, by = "id", all.x = TRUE)
id x y
1: 1 9 8
2: 2 12 NA
3: 4 14 19
4: 5 21 NA
5: 6 8 2
6: 6 8 4
y[x, on = .(id)]
id y x
1: 1 8 9
2: 2 NA 12
3: 4 19 14
4: 5 NA 21
5: 6 2 8
6: 6 4 8
data.table::merge.data.table(x = x, y = y, by = "id", all.y = T)
id x y
1: 1 9 8
2: 3 NA 14
3: 4 14 19
4: 6 8 2
5: 6 8 4
x[y, on = .(id)]
id x y
1: 1 9 8
2: 3 NA 14
3: 4 14 19
4: 6 8 2
5: 6 8 4
x[!y, on = .(id)]
id x
1: 2 12
2: 5 21
No need of the on argument when performing a join
setkey(x, id)
setkey(y, "id")
haskey(x); haskey(y)
[1] TRUE
[1] TRUE
key(x); key(y)
[1] "id"
[1] "id"
Inner join x and y
x[y, nomatch = 0]
id x y
1: 1 9 8
2: 4 14 19
3: 6 8 2
4: 6 8 4
Right join x and y
x[y]
id x y
1: 1 9 8
2: 3 NA 14
3: 4 14 19
4: 6 8 2
5: 6 8 4
Left join x and y
y[x]
id y x
1: 1 8 9
2: 2 NA 12
3: 4 19 14
4: 5 NA 21
5: 6 2 8
6: 6 4 8
library(dplyr)
library(data.table)
data(starwars)
starwarsDT <- as.data.table(starwars)
starwarsDT[1]
name height mass hair_color skin_color eye_color
1: Luke Skywalker 172 77 blond fair blue
birth_year sex gender homeworld species
1: 19 male masculine Tatooine Human
films
1: The Empire Strikes Back,Revenge of the Sith,Return of the Jedi,A New Hope,The Force Awakens
vehicles starships
1: Snowspeeder,Imperial Speeder Bike X-wing,Imperial shuttle
starwarsDT[, .(min = min(mass, na.rm = T),
max = max(mass, na.rm = T),
nobs = .N), by = homeworld][nobs >1]
homeworld min max nobs
1: Tatooine 32.0 136.0 10
2: Naboo 32.0 85.0 11
3: Alderaan 49.0 79.0 3
4: Kashyyyk 112.0 136.0 2
5: Corellia 77.0 80.0 2
6: <NA> 17.0 140.0 10
7: Kamino 78.2 88.0 3
8: Coruscant 50.0 50.0 3
9: Ryloth 55.0 55.0 2
10: Mirial 50.0 56.2 2
Tidy data
Two fundamental verbs
melt()
: ‘Wide’ -> ‘long’ datadcast()
: ‘long’ -> ‘wide’ dataWide format
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
---|---|---|---|---|
5.1 | 3.5 | 1.4 | 0.2 | setosa |
4.9 | 3.0 | 1.4 | 0.2 | setosa |
4.7 | 3.2 | 1.3 | 0.2 | setosa |
melt(wide_df, id.vars = 'grouping variable', variable.name = 'measures_across_grouping_var',value.name = 'value_name_as_desired')
Long to wide
dcast(long_df, group ~ atttribute, value.var = "value")
irisUnited <- irisDT[ , Sepal.Length.Width := paste0(Sepal.Length, "/", Sepal.Width)]
irisUnited[1:5]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1: 6.0 2.2 5.0 1.5 virginica
2: 4.9 2.5 4.5 1.7 virginica
3: 5.7 2.5 5.0 2.0 virginica
4: 6.3 2.5 5.0 1.9 virginica
5: 6.7 2.5 5.8 1.8 virginica
maxLength minWidth Sepal.Length.Width
1: 7.9 0.1 6/2.2
2: 7.9 0.1 4.9/2.5
3: 7.9 0.1 5.7/2.5
4: 7.9 0.1 6.3/2.5
5: 7.9 0.1 6.7/2.5
irisSeparated = irisUnited[, c('Sepal.Length', 'Sepal.Width') := tstrsplit(Sepal.Length.Width, "/")]
irisSeparated[1:5]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1: 6 2.2 5.0 1.5 virginica
2: 4.9 2.5 4.5 1.7 virginica
3: 5.7 2.5 5.0 2.0 virginica
4: 6.3 2.5 5.0 1.9 virginica
5: 6.7 2.5 5.8 1.8 virginica
maxLength minWidth Sepal.Length.Width
1: 7.9 0.1 6/2.2
2: 7.9 0.1 4.9/2.5
3: 7.9 0.1 5.7/2.5
4: 7.9 0.1 6.3/2.5
5: 7.9 0.1 6.7/2.5
Exercise 1
data("relig_income")
a = as.data.table(relig_income)
a[1:5]
religion <$10k $10-20k $20-30k $30-40k $40-50k $50-75k
1: Agnostic 27 34 60 81 76 137
2: Atheist 12 27 37 52 35 70
3: Buddhist 27 21 30 34 33 58
4: Catholic 418 617 732 670 638 1116
5: Don’t know/refused 15 14 15 11 10 35
$75-100k $100-150k >150k Don't know/refused
1: 122 109 84 96
2: 73 59 74 76
3: 62 39 53 54
4: 949 792 633 1489
5: 21 17 18 116
b = melt(a, id.vars = "religion", variable.name = "income_category", value.name = "n")
Exercise 2
data("storms")
aa = as.data.table(storms)
aa[, date:= paste0(year, "-", month,"-", day)]
.SD
: Subset of Data for each group
.SDcols
: Columns of the data.table that are included in .SDcols = sapply(irisDT, is.numeric)
cols = names(cols)[cols]
irisDT[, lapply(.SD, mean), by = Species, .SDcols = cols]
Species Petal.Length Petal.Width maxLength minWidth
1: virginica 5.552 2.026 7.9 0.1
2: versicolor 4.260 1.326 7.9 0.1
3: setosa 1.462 0.246 7.9 0.1
Species Petal.Length Petal.Width
1: virginica 5.552 2.026
2: versicolor 4.260 1.326
3: setosa 1.462 0.246
irisDT[ , unlist(recursive = FALSE, lapply(.(mean = mean, sd = sd, min = min, max = max),
function(f) lapply(.SD, f))), by = Species][1:5]
Species mean.Sepal.Length mean.Sepal.Width mean.Petal.Length
1: virginica NA NA 5.552
2: versicolor NA NA 4.260
3: setosa NA NA 1.462
4: <NA> NA NA NA
5: <NA> NA NA NA
mean.Petal.Width mean.maxLength mean.minWidth
1: 2.026 7.9 0.1
2: 1.326 7.9 0.1
3: 0.246 7.9 0.1
4: NA NA NA
5: NA NA NA
mean.Sepal.Length.Width sd.Sepal.Length sd.Sepal.Width
1: NA 0.6358796 0.3224966
2: NA 0.5161711 0.3137983
3: NA 0.3524897 0.3790644
4: NA NA NA
5: NA NA NA
sd.Petal.Length sd.Petal.Width sd.maxLength sd.minWidth
1: 0.5518947 0.2746501 0 0
2: 0.4699110 0.1977527 0 0
3: 0.1736640 0.1053856 0 0
4: NA NA NA NA
5: NA NA NA NA
sd.Sepal.Length.Width min.Sepal.Length min.Sepal.Width
1: NA 4.9 2.2
2: NA 4.9 2
3: NA 4.3 2.3
4: NA <NA> <NA>
5: NA <NA> <NA>
min.Petal.Length min.Petal.Width min.maxLength min.minWidth
1: 4.5 1.4 7.9 0.1
2: 3.0 1.0 7.9 0.1
3: 1.0 0.1 7.9 0.1
4: NA NA NA NA
5: NA NA NA NA
min.Sepal.Length.Width max.Sepal.Length max.Sepal.Width
1: 4.9/2.5 7.9 3.8
2: 4.9/2.4 7 3.4
3: 4.3/3 5.8 4.4
4: <NA> <NA> <NA>
5: <NA> <NA> <NA>
max.Petal.Length max.Petal.Width max.maxLength max.minWidth
1: 6.9 2.5 7.9 0.1
2: 5.1 1.8 7.9 0.1
3: 1.9 0.6 7.9 0.1
4: NA NA NA NA
5: NA NA NA NA
max.Sepal.Length.Width
1: 7.9/3.8
2: 7/3.2
3: 5/3.6
4: <NA>
5: <NA>
Exercise 1 - Compute the minimum, maximum, median and mean for all numeric columns
data("starwars")
starwarsDT = as.data.table(starwars)
cols = sapply(starwarsDT, is.numeric)
cols = names(cols)[cols]
starwarsDT[, height:= as.numeric(height)]
starwarsDT[, unlist(recursive = F,
lapply(.(min = min, max = max, median = median, mean = mean), function(f) lapply(.SD, f))), by = .(homeworld, species), .SDcols = cols][1:5]
homeworld species min.height min.mass min.birth_year max.height
1: Tatooine Human 163 NA 19 202
2: Tatooine Droid 97 32 NA 167
3: Naboo Droid 96 32 33 96
4: Alderaan Human 150 NA NA 191
5: Stewjon Human 182 77 57 182
max.mass max.birth_year median.height median.mass
1: NA 82 180.5 NA
2: 75 NA 132.0 53.5
3: 32 33 96.0 32.0
4: NA NA 188.0 NA
5: 77 57 182.0 77.0
median.birth_year mean.height mean.mass mean.birth_year
1: 44.45 179.2500 NA 47.475
2: NA 132.0000 53.5 NA
3: 33.00 96.0000 32.0 33.000
4: NA 176.3333 NA NA
5: 57.00 182.0000 77.0 57.000
Exercise 2 - all colnames to upper case
starwarsDT = as.data.table(starwars)
setnames(starwarsDT, toupper(names(starwarsDT)))
Convert all character columns to upper case
cols = sapply(starwarsDT, is.character)
cols = names(cols)[cols]
starwarsDT[, lapply(.SD, toupper)][1:5]
NAME HEIGHT MASS HAIR_COLOR SKIN_COLOR EYE_COLOR
1: LUKE SKYWALKER 172 77 BLOND FAIR BLUE
2: C-3PO 167 75 <NA> GOLD YELLOW
3: R2-D2 96 32 <NA> WHITE, BLUE RED
4: DARTH VADER 202 136 NONE WHITE YELLOW
5: LEIA ORGANA 150 49 BROWN LIGHT BROWN
BIRTH_YEAR SEX GENDER HOMEWORLD SPECIES
1: 19 MALE MASCULINE TATOOINE HUMAN
2: 112 NONE MASCULINE TATOOINE DROID
3: 33 NONE MASCULINE NABOO DROID
4: 41.9 MALE MASCULINE TATOOINE HUMAN
5: 19 FEMALE FEMININE ALDERAAN HUMAN
FILMS
1: C("THE EMPIRE STRIKES BACK", "REVENGE OF THE SITH", "RETURN OF THE JEDI", "A NEW HOPE", "THE FORCE AWAKENS")
2: C("THE EMPIRE STRIKES BACK", "ATTACK OF THE CLONES", "THE PHANTOM MENACE", "REVENGE OF THE SITH", "RETURN OF THE JEDI", "A NEW HOPE")
3: C("THE EMPIRE STRIKES BACK", "ATTACK OF THE CLONES", "THE PHANTOM MENACE", "REVENGE OF THE SITH", "RETURN OF THE JEDI", "A NEW HOPE", "THE FORCE AWAKENS")
4: C("THE EMPIRE STRIKES BACK", "REVENGE OF THE SITH", "RETURN OF THE JEDI", "A NEW HOPE")
5: C("THE EMPIRE STRIKES BACK", "REVENGE OF THE SITH", "RETURN OF THE JEDI", "A NEW HOPE", "THE FORCE AWAKENS")
VEHICLES
1: C("SNOWSPEEDER", "IMPERIAL SPEEDER BIKE")
2: CHARACTER(0)
3: CHARACTER(0)
4: CHARACTER(0)
5: IMPERIAL SPEEDER BIKE
STARSHIPS
1: C("X-WING", "IMPERIAL SHUTTLE")
2: CHARACTER(0)
3: CHARACTER(0)
4: TIE ADVANCED X1
5: CHARACTER(0)
Exercise 3
ts_diameter
and hu_diameter
stormsDT <- as.data.table(storms)
numericCols = sapply(stormsDT, is.numeric)
numericCols = names(numericCols)[numericCols]
stormsDTNew <- na.omit(stormsDT, cols = numericCols)
characterCols <- sapply(stormsDTNew, is.character)
characterCols <- names(characterCols)[characterCols]
colsOfInterest <- c("ts_diameter", "hu_diameter")
stormsDTNew[, lapply(.SD, mean), .SDcols = colsOfInterest, by = characterCols][1:5]
name status ts_diameter hu_diameter
1: Alex tropical depression 0.00000 0.0000
2: Alex tropical storm 150.80013 0.0000
3: Alex hurricane 304.40871 63.5669
4: Bonnie tropical depression 0.00000 0.0000
5: Bonnie tropical storm 75.40637 0.0000
.SD
: Subset of data - choose row by ordinal position
# irisDT[, .SD[1L]]
# irisDT[, .SD[1L], by = Species]
irisDT[, .SD[1L], by = Species, .SDcols = c("Petal.Length", "Petal.Width")]
Species Petal.Length Petal.Width
1: virginica 5.0 1.5
2: versicolor 3.5 1.0
3: setosa 1.3 0.3
.SD
: Subset of Data - select top entries (in each group)
irisDT[order(-Sepal.Length), .SD[1:2]]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1: 7.9 3.8 6.4 2.0 virginica
2: 7.7 2.6 6.9 2.3 virginica
maxLength minWidth Sepal.Length.Width
1: 7.9 0.1 7.9/3.8
2: 7.9 0.1 7.7/2.6
Species Sepal.Length Sepal.Width
1: virginica 7.9 3.8
2: versicolor 7 3.2
3: setosa 5.8 4
irisDT[, text := {
if(Sepal.Length > 4 & Sepal.Width > 4) "sepal length and width is larger than 4"
else if(Petal.Length < 2 & Petal.Width < 2) "petal length and width is smaller than 2"
else "other"
}, by = .(Species, Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)][, .SD[1], by = text][1:5]
text Sepal.Length Sepal.Width
1: other 6 2.2
2: petal length and width is smaller than 2 4.5 2.3
3: sepal length and width is larger than 4 5.2 4.1
4: <NA> <NA> <NA>
5: <NA> <NA> <NA>
Petal.Length Petal.Width Species maxLength minWidth
1: 5.0 1.5 virginica 7.9 0.1
2: 1.3 0.3 setosa 7.9 0.1
3: 1.5 0.1 setosa 7.9 0.1
4: NA NA <NA> NA NA
5: NA NA <NA> NA NA
Sepal.Length.Width
1: 6/2.2
2: 4.5/2.3
3: 5.2/4.1
4: <NA>
5: <NA>
bin_iris <- function(sl, sw, pl, pw){
if(sl > 4 & sw > 4) "sepal length and width is larger than 4"
else if(pl < 2 & pw < 2) "petal length and width is smaller than 2"
else "other"
}
irisDT[, text := bin_iris(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width),
by = .(Species, Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)][, .SD[1], by = text][1:5]
text Sepal.Length Sepal.Width
1: other 6 2.2
2: petal length and width is smaller than 2 4.5 2.3
3: sepal length and width is larger than 4 5.2 4.1
4: <NA> <NA> <NA>
5: <NA> <NA> <NA>
Petal.Length Petal.Width Species maxLength minWidth
1: 5.0 1.5 virginica 7.9 0.1
2: 1.3 0.3 setosa 7.9 0.1
3: 1.5 0.1 setosa 7.9 0.1
4: NA NA <NA> NA NA
5: NA NA <NA> NA NA
Sepal.Length.Width
1: 6/2.2
2: 4.5/2.3
3: 5.2/4.1
4: <NA>
5: <NA>
Exercise 1
Print the fifth row for each species.
starwarsDT <- as.data.table(starwars)
starwarsDT[, if(.N >= 5) .SD[5L], by = species]
species name height mass hair_color skin_color
1: Human Beru Whitesun lars 165 75 brown light
2: Droid R4-P17 96 NA none silver, red
eye_color birth_year sex gender homeworld
1: blue 47 female feminine Tatooine
2: red, blue NA none feminine <NA>
films vehicles
1: Attack of the Clones,Revenge of the Sith,A New Hope
2: Attack of the Clones,Revenge of the Sith
starships
1:
2:
Exercise 2
stormsDT <- as.data.table(storms)
stormsDT[, .(name, status, pressure)][order(name, pressure)][, .SD[1:min(5, .N)], by = name][1:5]
name status pressure
1: AL011993 tropical depression 999
2: AL011993 tropical depression 999
3: AL011993 tropical depression 999
4: AL011993 tropical depression 999
5: AL011993 tropical depression 1000
For attribution, please cite this work as
Okola (2020, Dec. 4). Basil Okola: data munging with data.table. Retrieved from https://bokola214.netlify.app/posts/2020-12-04-data-munging-withdatatable/
BibTeX citation
@misc{okola2020data, author = {Okola, Basil}, title = {Basil Okola: data munging with data.table}, url = {https://bokola214.netlify.app/posts/2020-12-04-data-munging-withdatatable/}, year = {2020} }