thr3ads.net - R help - [R] conduct pairwise column comparisons without comparing a column to itself [Oct 2007]

If this information is useful, please help other people find it:
Share via:

Luke Neraas

2007-Oct-19 20:19 UTC

[R] conduct pairwise column comparisons without comparing a column to itself

# Hello
# I  have a question regarding pairwise calculations of a matrix using a
"for-loop."
# Below I have a matrix "X" with 8 columns. These are genotypic data
so
Column1 & Column2 is
# a unit, Column3 & Column4 is a unit, Column5 & Column6 is a unit,  and
Coulmn7 & 8 is a unit.
# I have a loop designed to calculate the number of times an individual in
Column"i" & Column"j"
# has the same value and the same individual has two values that are the
same in Column"k" & Column"l" .
# I have another seires of code that adds a 2 in the poper location of a
data frame called "result.df".
# I have written a loop that accomplishes this "pair of columns"
pairwise
comparison, but it also compares
# some of the "pairs of Columns" to themselves. Is there a way to get
around
this?


# creation of the data matrix
c1<- c(1,4,3,2,4,1,3,2,4,3)
c2<- c(2,4,3,4,4,3,4,1,3,2)
c3<- c(1,3,2,4,4,3,4,4,2,2)
c4<- c(2,3,2,3,1,3,2,4,4,3)
c5<- c(1,2,1,1,2,2,2,3,2,1)
c6<- c(3,2,4,3,1,1,2,3,3,4)
c7<- c(1,2,1,2,3,2,3,2,1,2)
c8<- c(1,2,2,3,2,3,3,4,1,2)

X<-cbind(c1,c2,c3,c4,c5,c6,c7,c8)

X

## Creation of the result dataframe
result<- matrix(0,16,2)
result.df<-data.frame(result)
result.df[,1] <- c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4)
result.df[,2] <- c(1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4)
names(result.df)[1]<-"L(A)a(i)"
names(result.df)[2]<-"L(B)a(j)"

result.df



### The loop written to find Double Homozygotes


for (a in seq(1,(ncol(X)-3), by=2)){
for (b in seq(3,(ncol(X)-1), by=2)){
for (i in a){
    j <- a+1
for (k in b){
    l <- b+1

    match.rows <- ((X [,i] == X [, j] ) &   ( X [,k] == X [, l]))

    double_homo_i <- X [match.rows, i]
    double_homo_k <- X [match.rows, k]

    double_homo<- cbind( double_homo_i, double_homo_k)
    double_homo.df<-data.frame(double_homo,Counts=2)
       names(double_homo.df)[1]<-"L(A)a(i)"
       names(double_homo.df)[2]<- "L(B)a(j)"


# Below takes each round throught he loop and puts in the result.dfdataframe.

count<-double_homo.df

almost.df<-aggregate(count$Counts, list(count[,1],count[,2]),
FUN=sum)

temp<-order(almost.df$Group.1)
final.df<-almost.df[temp,]
names(final.df)[1]<-"L(A)a(i)"
names(final.df)[2]<-"L(B)a(j)"

result.df<-merge(result.df,final.df,by=c("L(A)a(i)","L(B)a(j)"),
all.x=T)

             }
             }
             }
             }


# here is the result I get

result.df



#     L(A)a(i) L(B)a(j) C1C2~C3C4 C1C2~C5C6 C1C2~C7C8 C3C4~C3C4 C3C4~C5C6
C3C4~C7C8 C5C6~C3C4 C5C6~C5C6 C5C6~C7C8
#  1         1        1            NA             NA              NA
      NA              NA               NA               NA
NA              NA
#  2         1        2            NA             NA
NA               NA              NA               NA
NA              NA              NA
#  3         1        3            NA             NA
NA               NA              NA               NA
NA              NA              NA
#  4         1        4            NA             NA
NA               NA              NA               NA
NA              NA              NA
#  5         2        1            NA             NA
NA               NA              NA               NA
NA              NA              NA
#  6         2        2            NA             NA
NA                 2               NA               NA
NA                4                 2
#  7         2        3            NA             NA
NA               NA              NA               NA
2              NA                2
#  8         2        4            NA             NA
NA               NA              NA               NA
NA              NA              NA
#  9         3        1            NA             NA
NA               NA              NA               NA               NA
       NA              NA
#  10        3        2             2              NA              NA
        NA                2                  2
NA              NA              NA
#  11        3        3           NA             NA
NA                 4               NA               NA
NA                 2              NA
#  12        3        4           NA             NA
NA               NA              NA               NA
2               NA             NA
#  13        4        1           NA             NA
NA               NA              NA                NA
NA              NA             NA
#  14        4        2           NA               2
2                NA             NA                NA
NA              NA             NA
#  15        4        3             2             NA
NA               NA               2                 NA
NA              NA             NA
#  16        4        4           NA            NA
NA                  2             NA                NA
NA              NA             NA





# Here is the Result I am looking for.

     L(A)a(i) L(B)a(j) C1C2~C3C4 C1C2~C5C6 C1C2~C7C8 C3C4~C5C6 C3C4~C7C8
C5C6~C7C8
# 1         1        1          NA              NA               NA
      NA              NA              NA
# 2         1        2          NA              NA
NA               NA              NA              NA
# 3         1        3          NA              NA
NA               NA              NA              NA
# 4         1        4          NA              NA
NA               NA              NA              NA
# 5         2        1          NA              NA
NA               NA              NA              NA
# 6         2        2          NA              NA
NA               NA              NA                2
# 7         2        3          NA              NA
NA               NA              NA                2
# 8         2        4          NA              NA
NA               NA              NA              NA
# 9         3        1          NA              NA               NA
      NA              NA              NA
# 10        3        2           2               NA
NA                 2                 2               NA
# 11        3        3         NA              NA
NA               NA              NA               NA
# 12        3        4         NA              NA
NA               NA              NA               NA
# 13        4        1         NA              NA
NA               NA              NA               NA
# 14        4        2         NA                2                  2
             NA               NA               NA
# 15        4        3           2              NA
NA                 2                NA               NA
# 16        4        4         NA             NA
NA               NA               NA                NA


# Any help or ideas would be greatly appreciated

# Thanks in advance

# Luke Neraas

# lukasneraas.r@gmail.com

# University of Alaska Fairbanks
# School of Fisheries and Ocean Sciences
# 11120 Glacier Highway
# UAF Fisheries Division
# Juneau, AK 99801

	[[alternative HTML version deleted]]

jim holtman

2007-Oct-19 23:20 UTC

head link

[R] Conduct pairwise column comparisons without comparing a column to itself

A little different solution, but it gives you the matches and the
columns in a more compact form.  You can always take the data and use
it to put into your array.
> # creation of the data matrix
> c1<- c(1,4,3,2,4,1,3,2,4,3)
> c2<- c(2,4,3,4,4,3,4,1,3,2)
> c3<- c(1,3,2,4,4,3,4,4,2,2)
> c4<- c(2,3,2,3,1,3,2,4,4,3)
> c5<- c(1,2,1,1,2,2,2,3,2,1)
> c6<- c(3,2,4,3,1,1,2,3,3,4)
>
>
> X<-cbind(c1,c2,c3,c4,c5,c6)
>
>
>
> # initialize a matrix with T/F for same values
> same <- matrix(FALSE, ncol=ncol(X) / 2, nrow=nrow(X))
> # set the values
> for (i in 1:ncol(same)) same[,i] <- X[, 2*i-1] == X[, 2*i]
>
> # get all possible combinations of numbers for accessing the matrix
> cbn <- combn(ncol(same), 2) # combinations take 2 at a time
> cbn  # see what it looks like     [,1] [,2] [,3]
[1,]    1    1    2
[2,]    2    3    3>
> # use this to interate through using 'lapply' since it returns
value
> values <- lapply(1:ncol(cbn), function(.col){ # similar to
'for', but better+     match <- which(same[, cbn[1, .col]] & same[, cbn[2, .col]])
+     if (length(match) == 0) return(NULL)  # no matches
+     # now return the values
+     cbind(LA=X[match, 2 * cbn[1, .col]],
+           LB=X[match, 2 * cbn[2, .col]],
+           col1=cbn[1, .col],
+           col2=cbn[2, .col])
+ })> X      c1 c2 c3 c4 c5 c6
 [1,]  1  2  1  2  1  3
 [2,]  4  4  3  3  2  2
 [3,]  3  3  2  2  1  4
 [4,]  2  4  4  3  1  3
 [5,]  4  4  4  1  2  1
 [6,]  1  3  3  3  2  1
 [7,]  3  4  4  2  2  2
 [8,]  2  1  4  4  3  3
 [9,]  4  3  2  4  2  3
[10,]  3  2  2  3  1  4> (values <- do.call('rbind', values))     LA LB col1 col2
[1,]  4  3    1    2
[2,]  3  2    1    2
[3,]  4  2    1    3
[4,]  3  2    2    3
[5,]  4  3    2    3>

On 10/19/07, Luke Neraas <lukasneraas.r at gmail.com>
wrote:> #Hi Jim,
> # here is a simpler version of my puzzle
> # I have added a bit of explanation near the bottom of this puzzle
> # I apologize for the confusion and sloppiness earlier.
>
>
> # I  have a question regarding pairwise calculations of a matrix using a
> "for-loop."
> # Below I have a matrix "X" with 6 columns. These are Genotypic
data so
> Column1 & Column2 is
> # a unit, Column3 & Column4 is a unit, Column5 & Column6 is a unit,
> # I have a loop designed to calculate the number of times an individual in
> Column"i" & Column"j"
> # has the same value and the same individual has two values that are the
> same in Column"k" & Column"l" .
> # I have another series of code that adds a 2 to a specific location in a
> results data frame called " result.df".
> # I have written a loop that accomplishes this "pair of columns"
pairwise
> comparison, but it also compares
> # some of the "pairs of Columns" to themselves. Is there a way to
get around
> this?
>
>
> # creation of the data matrix
> c1<- c(1,4,3,2,4,1,3,2,4,3)
> c2<- c(2,4,3,4,4,3,4,1,3,2)
> c3<- c(1,3,2,4,4,3,4,4,2,2)
> c4<- c(2,3,2,3,1,3,2,4,4,3)
> c5<- c(1,2,1,1,2,2,2,3,2,1)
> c6<- c(3,2,4,3,1,1,2,3,3,4)
>
>
> X<-cbind(c1,c2,c3,c4,c5,c6)
>
> X
>
> ## Creation of the result dataframe
> result<- matrix(0,16,2)
> result.df<-data.frame(result)
> result.df[,1] <- c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4)
> result.df[,2] <- c(1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4)
> names(result.df)[1]<-"L(A)a(ij)"
> names(result.df)[2]<-"L(B)a(kl)"
>
> result.df
>
>
>
> ### The loop written to find Double Homozygotes
>
>
> for (i in seq(1,(ncol(X)-3), by=2)){
>     j <- i+1
> for (k in seq(3,(ncol(X)-1), by=2)){
>     l <- k+1
>
>     match.rows <- ((X [,i] == X [, j] ) &   ( X [,k] == X [, l]))
>
>     double_homo_i <- X [match.rows, i]
>     double_homo_k <- X [match.rows, k]
>
>     double_homo<- cbind( double_homo_i, double_homo_k)
>     double_homo.df<-data.frame(double_homo,Counts=2)
>        names(double_homo.df)[1]<-"L(A)a(ij)"
>        names(double_homo.df)[2]<- "L(B)a(kl)"
>
>
> # Below takes the result from each loop and puts in the result.df
dataframe.
>
> count<-double_homo.df
>
> almost.df<-aggregate(count$Counts, list(count[,1],count[,2]),
> FUN=sum)
>
> temp<-order(almost.df$Group.1)
> final.df<-almost.df[temp,]
> names(final.df)[1]<-"L(A)a(ij)"
> names(final.df )[2]<-"L(B)a(kl)"
>
>
result.df<-merge(result.df,final.df,by=c("L(A)a(ij)","L(B)a(kl)"),
all.x=T)
>
>              }
>              }
>
>
>
> # Below are the result I get with the code above.
>
> result.df
>
>
>
> #     L(A)a(ij) L(B)a(kl) C1C2~C3C4 C1C2~C5C6 C3C4~C3C4 C3C4~C5C6
> # 1         1        1               NA              NA             NA
>         NA
> # 2         1        2               NA              NA             NA
>         NA
> # 3         1        3               NA              NA             NA
>         NA
> # 4         1        4               NA              NA             NA
>         NA
> # 5         2        1               NA              NA             NA
>         NA
> # 6         2        2               NA              NA               2
>           NA
> # 7         2        3               NA              NA             NA
>         NA
> # 8         2        4               NA              NA             NA
>         NA
> # 9         3        1               NA              NA             NA
>         NA
> # 10       3        2                 2               NA             NA
>            2
> # 11       3        3               NA              NA                4
>          NA
> # 12       3        4               NA              NA             NA
>        NA
> # 13       4        1               NA              NA             NA
>        NA
> # 14       4        2               NA                2              NA
>          NA
> # 15       4        3                 2               NA             NA
>            2
> # 16       4        4               NA              NA                2
>          NA
>
> # The first column in result.df is the value of the number (1-4) in a the
> first "column pair" comparison from "X" that has the
same value in a row.
> # The second column in result.df is the value of the number (1-4) in a
> "column pair" comparison from "X" that has the same
value in a row for that
> # column pair.
> # The third column in result.df has the value 2 added to the data.frame if
> the condition is met.
> # for example in :X" Col1 & Col2 row 3 has a "3 3" and
Col3 & Col4 has a "2
> 2" in row three. Therefore the result.df$C1C2~C3C4 has a 2 added to
> # the row where results.df$L(A)a(ij)=3 and results.df$L(B)a(kl)=2.
> # My major problem stems from having "Column pairs" compared to
themselves,
> such as result.df$C3C4~C3C4   are the results from
> # X[,3:4] compared to itself.
> # is there way to write the loop so these "Column Pairs" are not
compared to
> themselves.
> # Perhaps a change in the code for my loop :
> #                        for (i in seq(1,(ncol(X)-3), by=2)){
> #                        j <- i+1
> #                        for (k in seq(3,(ncol(X)-1), by=2)){
> #                        l <- k+1
>
>
>
> # Here is the Result I am looking for.
>
>      L(A)a(ij) L(B)a(kl) C1C2~C3C4 C1C2~C5C6 C3C4~C5C6
> # 1         1        1             NA             NA              NA
> # 2         1        2             NA             NA              NA
> # 3         1        3             NA             NA              NA
> # 4         1        4             NA             NA              NA
> # 5         2        1             NA             NA              NA
> # 6         2        2             NA             NA              NA
> # 7         2        3             NA             NA              NA
> # 8         2        4             NA             NA              NA
> # 9         3        1             NA             NA              NA
> # 10        3        2              2              NA                2
> # 11        3        3            NA             NA              NA
> # 12        3        4            NA             NA              NA
> # 13        4        1            NA             NA              NA
> # 14        4        2            NA               2               NA
> # 15        4        3              2             NA                 2
> # 16        4        4            NA            NA               NA
>
>
> # Any help or ideas would be greatly appreciated
>
> # Thanks in advance
>
> # Luke Neraas
>
> # lukasneraas.r at gmail.com
>
> # University of Alaska Fairbanks
> # School of Fisheries and Ocean Sciences
> # 11120 Glacier Highway
> # UAF Fisheries Division
> # Juneau, AK 99801
>
>
>
>

-- 
Jim Holtman
Cincinnati, OH
+1 513 646 9390

What is the problem you are trying to solve?

Apparently Analagous Threads

Search for more apparently analagous threads

R help - Oct 2007 - conduct pairwise column comparisons without comparing a column to itself

[R] conduct pairwise column comparisons without comparing a column to itself

[R] Conduct pairwise column comparisons without comparing a column to itself

Apparently Analagous Threads