Kelly Thompson
2022-Oct-21 22:23 UTC
[R] getting data from a "vertical" table into a "2-dimensional" grid
# I think this might be a better example.
# I have data presented in a "vertical" dataframe as shown below in
data_original.
# I want this data in a matrix or "grid", as shown below.
# What I show below seems like one way this can be done.
# My question: Are there easier or better ways to do this, especially
in Base R, and also in R packages?
#create data
set.seed(1)
data_original <- data.frame(year = rep(1990:1999, length = 50),
category = sample(1:5, size = 50, replace = TRUE), sales sample(0:99999, size =
50 , replace = TRUE) )
dim(data_original)
#remove rows where data_original$year == 1990 & data_original$category
== 5, to ensure there is at least one NA in the "grid"
data_original <- data_original[ (data_original$year == 1990 &
data_original$category == 5) == FALSE, ]
dim(data_original)
#aggregate data
data_aggregate_sum_by_year_and_category <- aggregate(x data_original$sales,
by = list(year = data_original$year, category data_original$category), FUN =
sum)
colnames(data_aggregate_sum_by_year_and_category) <- c('year',
'category', 'sum_of_sales')
dim(data_aggregate_sum_by_year_and_category)
data_expanded <- expand.grid(year
unique(data_aggregate_sum_by_year_and_category$year), category
unique(data_aggregate_sum_by_year_and_category$category))
dim(data_expanded)
data_expanded <- merge(data_expanded,
data_aggregate_sum_by_year_and_category, all = TRUE)
dim(data_expanded)
mat <- matrix(data = data_expanded$sum_of_sales, nrow
length(unique(data_expanded$year)), ncol length(unique(data_expanded$category))
, byrow = TRUE, dimnames list( unique(data_expanded$year),
unique(data_expanded$category) ) )
data_original
data_expanded
mat
On Fri, Oct 21, 2022 at 5:03 PM Kelly Thompson <kt1572757 at gmail.com>
wrote:>
> ###
> #I have data presented in a "vertical" data frame as shown below
in
> data_original.
> #I want this data in a matrix or "grid", as shown below.
> #What I show below seems like one way this can be done.
>
> #My question: Are there easier or better ways to do this, especially
> in Base R, and also in R packages?
>
> #reproducible example
>
> data_original <- data.frame(year = c('1990', '1999',
'1990', '1989'),
> size = c('s', 'l', 'xl', 'xs'), n = c(99,
33, 3, 4) )
>
> data_expanded <- expand.grid(unique(data_original$year),
> unique(data_original$size), stringsAsFactors = FALSE )
> colnames(data_expanded) <- c('year', 'size')
> data_expanded <- merge(data_expanded, data_original, all = TRUE)
>
> mat <- matrix(data = data_expanded $n, nrow >
length(unique(data_expanded $year)), ncol > length(unique(data_expanded
$size)) , byrow = TRUE, dimnames = list(
> unique(data_expanded$year), unique(data_expanded$size) ) )
>
> data_original
> data_expanded
> mat
Kelly Thompson
2022-Oct-21 22:32 UTC
[R] getting data from a "vertical" table into a "2-dimensional" grid
As my end result, I want a matrix or data frame, with one row for each year, and one column for each category. On Fri, Oct 21, 2022 at 6:23 PM Kelly Thompson <kt1572757 at gmail.com> wrote:> > # I think this might be a better example. > > # I have data presented in a "vertical" dataframe as shown below in > data_original. > # I want this data in a matrix or "grid", as shown below. > # What I show below seems like one way this can be done. > > # My question: Are there easier or better ways to do this, especially > in Base R, and also in R packages? > > #create data > set.seed(1) > data_original <- data.frame(year = rep(1990:1999, length = 50), > category = sample(1:5, size = 50, replace = TRUE), sales > sample(0:99999, size = 50 , replace = TRUE) ) > dim(data_original) > > #remove rows where data_original$year == 1990 & data_original$category > == 5, to ensure there is at least one NA in the "grid" > data_original <- data_original[ (data_original$year == 1990 & > data_original$category == 5) == FALSE, ] > dim(data_original) > > #aggregate data > data_aggregate_sum_by_year_and_category <- aggregate(x > data_original$sales, by = list(year = data_original$year, category > data_original$category), FUN = sum) > colnames(data_aggregate_sum_by_year_and_category) <- c('year', > 'category', 'sum_of_sales') > dim(data_aggregate_sum_by_year_and_category) > > data_expanded <- expand.grid(year > unique(data_aggregate_sum_by_year_and_category$year), category > unique(data_aggregate_sum_by_year_and_category$category)) > dim(data_expanded) > data_expanded <- merge(data_expanded, > data_aggregate_sum_by_year_and_category, all = TRUE) > dim(data_expanded) > > mat <- matrix(data = data_expanded$sum_of_sales, nrow > length(unique(data_expanded$year)), ncol > length(unique(data_expanded$category)) , byrow = TRUE, dimnames > list( unique(data_expanded$year), unique(data_expanded$category) ) ) > > > data_original > data_expanded > mat > > On Fri, Oct 21, 2022 at 5:03 PM Kelly Thompson <kt1572757 at gmail.com> wrote: > > > > ### > > #I have data presented in a "vertical" data frame as shown below in > > data_original. > > #I want this data in a matrix or "grid", as shown below. > > #What I show below seems like one way this can be done. > > > > #My question: Are there easier or better ways to do this, especially > > in Base R, and also in R packages? > > > > #reproducible example > > > > data_original <- data.frame(year = c('1990', '1999', '1990', '1989'), > > size = c('s', 'l', 'xl', 'xs'), n = c(99, 33, 3, 4) ) > > > > data_expanded <- expand.grid(unique(data_original$year), > > unique(data_original$size), stringsAsFactors = FALSE ) > > colnames(data_expanded) <- c('year', 'size') > > data_expanded <- merge(data_expanded, data_original, all = TRUE) > > > > mat <- matrix(data = data_expanded $n, nrow > > length(unique(data_expanded $year)), ncol > > length(unique(data_expanded $size)) , byrow = TRUE, dimnames = list( > > unique(data_expanded$year), unique(data_expanded$size) ) ) > > > > data_original > > data_expanded > > mat