Kathan Desai
2020-Jul-03 20:56 UTC
[R] Help with looping a function over a list of dataframes:
I have been trying to run a forloop for a function that compares dataframe n with dataframe n-1, across a list of dataframes. It does this by checking each midpoint of dataframe n with each midpoint of dataframe n-1. This is done to make up for an disparity in row length. The idea of this code is to identify any objects that are stationary, and assign them an id of 1, and the dynamic objects are assigned an id of 0 (examples can be found below). *This is what i have so far:* for(i in seq_along(list_df)){ list_df$position_tab_[[i]]$ID <- unlist(lapply(list_df$position_tab_[[i]]$midpoint, function(x) ifelse(any(abs(x - list_df$position_tab_[[i-1]]$midpoint) <= 1),1,0) )) } There is no error message being produced so theres nothing to debug, i am quite new to R programming in general so excuse any silly mistakes i may have made. The function doesnt seem to be adding the ID columns and comparing the data as it should. my list of dataframes contain dataframes named: position_tab_1, position_tab_2 .... position_tab_121. Each position_tab represents a timepoints, so in total there are 121 timepoints (frames). I need the loop to run so that pos_tab_2 compares to pos_tab_1 and this continues all the way to pos_tab_121 comparing to pos_tab_120. The function adds a column named "id" to each of these dataframes as it compares to the dataframe before it, so all dataframes apart from position_tab_1 (as it has nothing to compare to) should have this added. *Some of my data (first 10 dataframes in list):*> dput(list_df[1:10])list(position_tab_1 = structure(list(Object = c(2666L, 2668L, 2671L, 2674L, 2676L, 2677L, 2678L, 2679L, 2680L, 2682L, 2683L, 2684L, 2685L, 2686L, 2687L, 2689L, 2692L, 2693L, 2694L, 2695L, 2696L), minimum = c(4L, 39L, 147L, 224L, 419L, 531L, 595L, 641L, 669L, 723L, 810L, 836L, 907L, 978L, 1061L, 1129L, 1290L, 1519L, 1749L, 1843L, 1897L), maximum = c(22L, 85L, 173L, 242L, 449L, 587L, 627L, 655L, 702L, 740L, 828L, 890L, 923L, 1024L, 1086L, 1144L, 1302L, 1544L, 1780L, 1870L, 1925L), midpoint = c(13, 62, 160, 233, 434, 559, 611, 648, 685.5, 731.5, 819, 863, 915, 1001, 1073.5, 1136.5, 1296, 1531.5, 1764.5, 1856.5, 1911)), row.names = c(NA, -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_2 structure(list( Object = c(2645L, 2646L, 2650L, 2652L, 2655L, 2656L, 2657L, 2658L, 2659L, 2661L, 2662L, 2663L, 2664L, 2665L, 2667L, 2670L, 2675L, 2681L, 2688L, 2690L, 2691L), minimum = c(4L, 40L, 147L, 224L, 415L, 532L, 595L, 641L, 670L, 722L, 811L, 835L, 907L, 978L, 1061L, 1128L, 1289L, 1520L, 1748L, 1843L, 1897L ), maximum = c(22L, 85L, 173L, 242L, 445L, 588L, 627L, 655L, 702L, 739L, 828L, 891L, 923L, 1022L, 1085L, 1143L, 1302L, 1544L, 1779L, 1870L, 1925L), midpoint = c(13, 62.5, 160, 233, 430, 560, 611, 648, 686, 730.5, 819.5, 863, 915, 1000, 1073, 1135.5, 1295.5, 1532, 1763.5, 1856.5, 1911)), row.names = c(NA, -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_3 structure(list( Object = c(2623L, 2624L, 2627L, 2631L, 2633L, 2635L, 2636L, 2637L, 2638L, 2640L, 2641L, 2642L, 2643L, 2644L, 2647L, 2649L, 2654L, 2660L, 2669L, 2672L, 2673L), minimum = c(3L, 39L, 149L, 223L, 402L, 539L, 594L, 639L, 669L, 722L, 811L, 834L, 907L, 979L, 1060L, 1129L, 1289L, 1520L, 1749L, 1842L, 1897L ), maximum = c(22L, 86L, 175L, 241L, 431L, 587L, 627L, 653L, 700L, 738L, 828L, 894L, 925L, 1021L, 1084L, 1144L, 1302L, 1544L, 1779L, 1869L, 1925L), midpoint = c(12.5, 62.5, 162, 232, 416.5, 563, 610.5, 646, 684.5, 730, 819.5, 864, 916, 1000, 1072, 1136.5, 1295.5, 1532, 1764, 1855.5, 1911)), row.names c(NA, -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_4 structure(list( Object = c(2600L, 2604L, 2606L, 2609L, 2611L, 2613L, 2614L, 2615L, 2617L, 2618L, 2619L, 2620L, 2621L, 2622L, 2626L, 2628L, 2632L, 2639L, 2648L, 2651L, 2653L), minimum = c(4L, 42L, 142L, 223L, 393L, 547L, 595L, 641L, 669L, 720L, 809L, 835L, 908L, 979L, 1059L, 1127L, 1289L, 1519L, 1749L, 1841L, 1897L ), maximum = c(22L, 85L, 172L, 241L, 421L, 587L, 629L, 655L, 701L, 738L, 826L, 890L, 925L, 1019L, 1084L, 1143L, 1301L, 1544L, 1780L, 1868L, 1925L), midpoint = c(13, 63.5, 157, 232, 407, 567, 612, 648, 685, 729, 817.5, 862.5, 916.5, 999, 1071.5, 1135, 1295, 1531.5, 1764.5, 1854.5, 1911)), row.names = c(NA, -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_5 structure(list( Object = c(2580L, 2581L, 2585L, 2586L, 2589L, 2590L, 2592L, 2593L, 2594L, 2595L, 2596L, 2597L, 2598L, 2599L, 2601L, 2603L, 2605L, 2610L, 2616L, 2625L, 2629L, 2630L, 2634L), minimum = c(3L, 43L, 132L, 223L, 391L, 547L, 643L, 669L, 721L, 808L, 836L, 861L, 908L, 978L, 1028L, 1057L, 1127L, 1288L, 1519L, 1748L, 1842L, 1893L, 2148L), maximum = c(21L, 85L, 168L, 241L, 419L, 629L, 656L, 701L, 738L, 828L, 858L, 890L, 925L, 1013L, 1050L, 1083L, 1143L, 1301L, 1544L, 1780L, 1869L, 1925L, 2148L), midpoint = c(12, 64, 150, 232, 405, 588, 649.5, 685, 729.5, 818, 847, 875.5, 916.5, 995.5, 1039, 1070, 1135, 1294.5, 1531.5, 1764, 1855.5, 1909, 2148)), row.names = c(NA, -23L ), class = c("tbl_df", "tbl", "data.frame")), position_tab_6 structure(list( Object = c(2555L, 2559L, 2562L, 2563L, 2564L, 2567L, 2569L, 2570L, 2571L, 2572L, 2573L, 2574L, 2575L, 2576L, 2577L, 2579L, 2583L, 2587L, 2591L, 2602L, 2607L, 2608L, 2612L), minimum = c(4L, 45L, 123L, 154L, 224L, 390L, 546L, 600L, 643L, 669L, 720L, 804L, 836L, 908L, 967L, 1058L, 1129L, 1289L, 1519L, 1748L, 1843L, 1893L, 2147L), maximum = c(23L, 86L, 150L, 171L, 241L, 419L, 589L, 636L, 657L, 701L, 738L, 827L, 879L, 925L, 1011L, 1084L, 1144L, 1301L, 1543L, 1780L, 1871L, 1924L, 2148L), midpoint = c(13.5, 65.5, 136.5, 162.5, 232.5, 404.5, 567.5, 618, 650, 685, 729, 815.5, 857.5, 916.5, 989, 1071, 1136.5, 1295, 1531, 1764, 1857, 1908.5, 2147.5)), row.names = c(NA, -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_7 structure(list( Object = c(2537L, 2539L, 2540L, 2541L, 2542L, 2544L, 2546L, 2547L, 2548L, 2549L, 2550L, 2551L, 2552L, 2554L, 2556L, 2558L, 2560L, 2565L, 2568L, 2578L, 2582L, 2584L, 2588L), minimum = c(3L, 45L, 122L, 156L, 224L, 387L, 546L, 601L, 669L, 719L, 803L, 837L, 908L, 959L, 1059L, 1096L, 1128L, 1289L, 1519L, 1748L, 1844L, 1892L, 2147L), maximum = c(22L, 86L, 147L, 172L, 241L, 415L, 590L, 656L, 699L, 738L, 830L, 871L, 924L, 1014L, 1082L, 1119L, 1144L, 1301L, 1543L, 1781L, 1872L, 1925L, 2148L), midpoint = c(12.5, 65.5, 134.5, 164, 232.5, 401, 568, 628.5, 684, 728.5, 816.5, 854, 916, 986.5, 1070.5, 1107.5, 1136, 1295, 1531, 1764.5, 1858, 1908.5, 2147.5)), row.names = c(NA, -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_8 structure(list( Object = c(2514L, 2517L, 2519L, 2520L, 2521L, 2523L, 2525L, 2526L, 2527L, 2528L, 2529L, 2530L, 2531L, 2532L, 2533L, 2534L, 2536L, 2543L, 2545L, 2553L, 2557L, 2561L, 2566L), minimum = c(5L, 44L, 121L, 153L, 224L, 380L, 546L, 603L, 668L, 721L, 802L, 841L, 907L, 960L, 1006L, 1060L, 1106L, 1288L, 1518L, 1748L, 1843L, 1893L, 2148L), maximum = c(23L, 86L, 146L, 170L, 242L, 409L, 588L, 655L, 699L, 738L, 830L, 872L, 924L, 994L, 1029L, 1084L, 1143L, 1302L, 1543L, 1781L, 1870L, 1925L, 2148L), midpoint = c(14, 65, 133.5, 161.5, 233, 394.5, 567, 629, 683.5, 729.5, 816, 856.5, 915.5, 977, 1017.5, 1072, 1124.5, 1295, 1530.5, 1764.5, 1856.5, 1909, 2148)), row.names = c(NA, -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_9 structure(list( Object = c(2492L, 2493L, 2497L, 2498L, 2499L, 2501L, 2503L, 2504L, 2505L, 2506L, 2507L, 2508L, 2509L, 2510L, 2511L, 2513L, 2516L, 2522L, 2524L, 2532L, 2535L, 2538L), minimum = c(6L, 44L, 111L, 149L, 224L, 375L, 548L, 596L, 668L, 722L, 800L, 840L, 908L, 960L, 1005L, 1058L, 1127L, 1289L, 1519L, 1748L, 1842L, 1891L), maximum = c(24L, 81L, 137L, 167L, 242L, 403L, 589L, 656L, 699L, 738L, 828L, 872L, 925L, 994L, 1028L, 1081L, 1149L, 1302L, 1544L, 1780L, 1868L, 1924L), midpoint = c(15, 62.5, 124, 158, 233, 389, 568.5, 626, 683.5, 730, 814, 856, 916.5, 977, 1016.5, 1069.5, 1138, 1295.5, 1531.5, 1764, 1855, 1907.5)), row.names = c(NA, -22L), class = c("tbl_df", "tbl", "data.frame")), position_tab_10 = structure(list(Object = c(2469L, 2471L, 2474L, 2475L, 2476L, 2478L, 2481L, 2482L, 2483L, 2484L, 2485L, 2486L, 2487L, 2488L, 2489L, 2491L, 2495L, 2500L, 2502L, 2512L, 2515L, 2518L), minimum = c(6L, 38L, 109L, 147L, 223L, 363L, 548L, 597L, 668L, 719L, 803L, 839L, 908L, 958L, 1004L, 1058L, 1126L, 1288L, 1519L, 1746L, 1841L, 1892L), maximum = c(24L, 76L, 134L, 165L, 240L, 394L, 591L, 656L, 698L, 737L, 829L, 869L, 924L, 996L, 1027L, 1081L, 1147L, 1301L, 1543L, 1781L, 1868L, 1925L), midpoint = c(15, 57, 121.5, 156, 231.5, 378.5, 569.5, 626.5, 683, 728, 816, 854, 916, 977, 1015.5, 1069.5, 1136.5, 1294.5, 1531, 1763.5, 1854.5, 1908.5)), row.names = c(NA, -22L ), class = c("tbl_df", "tbl", "data.frame"))) *What is produced when running the base code without any loops:* This is the base code without me trying to loop it in anyway, below is what it produces when its used with dataframe 4 and 5: #the code: list_df$position_tab_5$ID <- unlist(lapply(list_df$position_tab_5$midpoint, function(x) ifelse(any(abs(x - list_df$position_tab_4$midpoint) <= 1),1,0))) ##position_tab_5 after manipulations have occured: structure(list(Object = c(2580L, 2581L, 2585L, 2586L, 2589L, 2590L, 2592L, 2593L, 2594L, 2595L, 2596L, 2597L, 2598L, 2599L, 2601L, 2603L, 2605L, 2610L, 2616L, 2625L, 2629L, 2630L, 2634L ), minimum = c(3L, 43L, 132L, 223L, 391L, 547L, 643L, 669L, 721L, 808L, 836L, 861L, 908L, 978L, 1028L, 1057L, 1127L, 1288L, 1519L, 1748L, 1842L, 1893L, 2148L), maximum = c(21L, 85L, 168L, 241L, 419L, 629L, 656L, 701L, 738L, 828L, 858L, 890L, 925L, 1013L, 1050L, 1083L, 1143L, 1301L, 1544L, 1780L, 1869L, 1925L, 2148L ), midpoint = c(12, 64, 150, 232, 405, 588, 649.5, 685, 729.5, 818, 847, 875.5, 916.5, 995.5, 1039, 1070, 1135, 1294.5, 1531.5, 1764, 1855.5, 1909, 2148), ID = c(1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0)), row.names = c(NA, -23L), class = c("tbl_df", "tbl", "data.frame")) #position_tab_4 (the DF pos_tab_5 is being compared to) structure(list(Object = c(2600L, 2604L, 2606L, 2609L, 2611L, 2613L, 2614L, 2615L, 2617L, 2618L, 2619L, 2620L, 2621L, 2622L, 2626L, 2628L, 2632L, 2639L, 2648L, 2651L, 2653L), minimum = c(4L, 42L, 142L, 223L, 393L, 547L, 595L, 641L, 669L, 720L, 809L, 835L, 908L, 979L, 1059L, 1127L, 1289L, 1519L, 1749L, 1841L, 1897L), maximum = c(22L, 85L, 172L, 241L, 421L, 587L, 629L, 655L, 701L, 738L, 826L, 890L, 925L, 1019L, 1084L, 1143L, 1301L, 1544L, 1780L, 1868L, 1925L), midpoint = c(13, 63.5, 157, 232, 407, 567, 612, 648, 685, 729, 817.5, 862.5, 916.5, 999, 1071.5, 1135, 1295, 1531.5, 1764.5, 1854.5, 1911)), row.names = c(NA, -21L), class = c("tbl_df", "tbl", "data.frame")) *Appreciate any help, anyone can provide!* [[alternative HTML version deleted]]
Jim Lemon
2020-Jul-04 10:14 UTC
[R] Help with looping a function over a list of dataframes:
Hi Kathan, This is a very lazy answer as I haven't tested it. I think you will need to wrap your loop in a function and return the modified list_df to assign it like this: add_IDs<-function(xdf) { for(i in seq_along(xdf)) { xdf$position_tab_[[i]]$ID <- unlist(lapply(xdf$position_tab_[[i]]$midpoint, function(x) ifelse(any(abs(x - xdf$position_tab_[[i-1]]$midpoint)<= 1),1,0))) } return(xdf) } list_df<-add_IDs(list_df) Jim On Sat, Jul 4, 2020 at 4:48 PM Kathan Desai <kdesai1 at sheffield.ac.uk> wrote:> > I have been trying to run a forloop for a function that compares dataframe n > with dataframe n-1, across a list of dataframes. It does this by checking > each midpoint of dataframe n with each midpoint of dataframe n-1. This is > done to make up for an disparity in row length. The idea of this code is to > identify any objects that are stationary, and assign them an id of 1, and > the dynamic objects are assigned an id of 0 (examples can be found below). > > > *This is what i have so far:* > for(i in seq_along(list_df)){ > list_df$position_tab_[[i]]$ID <- > unlist(lapply(list_df$position_tab_[[i]]$midpoint, function(x) > ifelse(any(abs(x - list_df$position_tab_[[i-1]]$midpoint) > <= 1),1,0) > )) > } > > There is no error message being produced so theres nothing to debug, i am > quite new to R programming in general so excuse any silly mistakes i may > have made. The function doesnt seem to be adding the ID columns and > comparing the data as it should. > > my list of dataframes contain dataframes named: position_tab_1, > position_tab_2 .... position_tab_121. Each position_tab represents a > timepoints, so in total there are 121 timepoints (frames). I need the loop > to run so that pos_tab_2 compares to pos_tab_1 and this continues all the > way to pos_tab_121 comparing to pos_tab_120. > > The function adds a column named "id" to each of these dataframes as it > compares to the dataframe before it, so all dataframes apart from > position_tab_1 (as it has nothing to compare to) should have this added. > > > *Some of my data (first 10 dataframes in list):* > > dput(list_df[1:10]) > list(position_tab_1 = structure(list(Object = c(2666L, 2668L, > 2671L, 2674L, 2676L, 2677L, 2678L, 2679L, 2680L, 2682L, 2683L, > 2684L, 2685L, 2686L, 2687L, 2689L, 2692L, 2693L, 2694L, 2695L, > 2696L), minimum = c(4L, 39L, 147L, 224L, 419L, 531L, 595L, 641L, > 669L, 723L, 810L, 836L, 907L, 978L, 1061L, 1129L, 1290L, 1519L, > 1749L, 1843L, 1897L), maximum = c(22L, 85L, 173L, 242L, 449L, > 587L, 627L, 655L, 702L, 740L, 828L, 890L, 923L, 1024L, 1086L, > 1144L, 1302L, 1544L, 1780L, 1870L, 1925L), midpoint = c(13, 62, > 160, 233, 434, 559, 611, 648, 685.5, 731.5, 819, 863, 915, 1001, > 1073.5, 1136.5, 1296, 1531.5, 1764.5, 1856.5, 1911)), row.names = c(NA, > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_2 > structure(list( > Object = c(2645L, 2646L, 2650L, 2652L, 2655L, 2656L, 2657L, > 2658L, 2659L, 2661L, 2662L, 2663L, 2664L, 2665L, 2667L, 2670L, > 2675L, 2681L, 2688L, 2690L, 2691L), minimum = c(4L, 40L, > 147L, 224L, 415L, 532L, 595L, 641L, 670L, 722L, 811L, 835L, > 907L, 978L, 1061L, 1128L, 1289L, 1520L, 1748L, 1843L, 1897L > ), maximum = c(22L, 85L, 173L, 242L, 445L, 588L, 627L, 655L, > 702L, 739L, 828L, 891L, 923L, 1022L, 1085L, 1143L, 1302L, > 1544L, 1779L, 1870L, 1925L), midpoint = c(13, 62.5, 160, > 233, 430, 560, 611, 648, 686, 730.5, 819.5, 863, 915, 1000, > 1073, 1135.5, 1295.5, 1532, 1763.5, 1856.5, 1911)), row.names = c(NA, > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_3 > structure(list( > Object = c(2623L, 2624L, 2627L, 2631L, 2633L, 2635L, 2636L, > 2637L, 2638L, 2640L, 2641L, 2642L, 2643L, 2644L, 2647L, 2649L, > 2654L, 2660L, 2669L, 2672L, 2673L), minimum = c(3L, 39L, > 149L, 223L, 402L, 539L, 594L, 639L, 669L, 722L, 811L, 834L, > 907L, 979L, 1060L, 1129L, 1289L, 1520L, 1749L, 1842L, 1897L > ), maximum = c(22L, 86L, 175L, 241L, 431L, 587L, 627L, 653L, > 700L, 738L, 828L, 894L, 925L, 1021L, 1084L, 1144L, 1302L, > 1544L, 1779L, 1869L, 1925L), midpoint = c(12.5, 62.5, 162, > 232, 416.5, 563, 610.5, 646, 684.5, 730, 819.5, 864, 916, > 1000, 1072, 1136.5, 1295.5, 1532, 1764, 1855.5, 1911)), row.names > c(NA, > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_4 > structure(list( > Object = c(2600L, 2604L, 2606L, 2609L, 2611L, 2613L, 2614L, > 2615L, 2617L, 2618L, 2619L, 2620L, 2621L, 2622L, 2626L, 2628L, > 2632L, 2639L, 2648L, 2651L, 2653L), minimum = c(4L, 42L, > 142L, 223L, 393L, 547L, 595L, 641L, 669L, 720L, 809L, 835L, > 908L, 979L, 1059L, 1127L, 1289L, 1519L, 1749L, 1841L, 1897L > ), maximum = c(22L, 85L, 172L, 241L, 421L, 587L, 629L, 655L, > 701L, 738L, 826L, 890L, 925L, 1019L, 1084L, 1143L, 1301L, > 1544L, 1780L, 1868L, 1925L), midpoint = c(13, 63.5, 157, > 232, 407, 567, 612, 648, 685, 729, 817.5, 862.5, 916.5, 999, > 1071.5, 1135, 1295, 1531.5, 1764.5, 1854.5, 1911)), row.names = c(NA, > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_5 > structure(list( > Object = c(2580L, 2581L, 2585L, 2586L, 2589L, 2590L, 2592L, > 2593L, 2594L, 2595L, 2596L, 2597L, 2598L, 2599L, 2601L, 2603L, > 2605L, 2610L, 2616L, 2625L, 2629L, 2630L, 2634L), minimum = c(3L, > 43L, 132L, 223L, 391L, 547L, 643L, 669L, 721L, 808L, 836L, > 861L, 908L, 978L, 1028L, 1057L, 1127L, 1288L, 1519L, 1748L, > 1842L, 1893L, 2148L), maximum = c(21L, 85L, 168L, 241L, 419L, > 629L, 656L, 701L, 738L, 828L, 858L, 890L, 925L, 1013L, 1050L, > 1083L, 1143L, 1301L, 1544L, 1780L, 1869L, 1925L, 2148L), > midpoint = c(12, 64, 150, 232, 405, 588, 649.5, 685, 729.5, > 818, 847, 875.5, 916.5, 995.5, 1039, 1070, 1135, 1294.5, > 1531.5, 1764, 1855.5, 1909, 2148)), row.names = c(NA, -23L > ), class = c("tbl_df", "tbl", "data.frame")), position_tab_6 > structure(list( > Object = c(2555L, 2559L, 2562L, 2563L, 2564L, 2567L, 2569L, > 2570L, 2571L, 2572L, 2573L, 2574L, 2575L, 2576L, 2577L, 2579L, > 2583L, 2587L, 2591L, 2602L, 2607L, 2608L, 2612L), minimum = c(4L, > 45L, 123L, 154L, 224L, 390L, 546L, 600L, 643L, 669L, 720L, > 804L, 836L, 908L, 967L, 1058L, 1129L, 1289L, 1519L, 1748L, > 1843L, 1893L, 2147L), maximum = c(23L, 86L, 150L, 171L, 241L, > 419L, 589L, 636L, 657L, 701L, 738L, 827L, 879L, 925L, 1011L, > 1084L, 1144L, 1301L, 1543L, 1780L, 1871L, 1924L, 2148L), > midpoint = c(13.5, 65.5, 136.5, 162.5, 232.5, 404.5, 567.5, > 618, 650, 685, 729, 815.5, 857.5, 916.5, 989, 1071, 1136.5, > 1295, 1531, 1764, 1857, 1908.5, 2147.5)), row.names = c(NA, > -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_7 > structure(list( > Object = c(2537L, 2539L, 2540L, 2541L, 2542L, 2544L, 2546L, > 2547L, 2548L, 2549L, 2550L, 2551L, 2552L, 2554L, 2556L, 2558L, > 2560L, 2565L, 2568L, 2578L, 2582L, 2584L, 2588L), minimum = c(3L, > 45L, 122L, 156L, 224L, 387L, 546L, 601L, 669L, 719L, 803L, > 837L, 908L, 959L, 1059L, 1096L, 1128L, 1289L, 1519L, 1748L, > 1844L, 1892L, 2147L), maximum = c(22L, 86L, 147L, 172L, 241L, > 415L, 590L, 656L, 699L, 738L, 830L, 871L, 924L, 1014L, 1082L, > 1119L, 1144L, 1301L, 1543L, 1781L, 1872L, 1925L, 2148L), > midpoint = c(12.5, 65.5, 134.5, 164, 232.5, 401, 568, 628.5, > 684, 728.5, 816.5, 854, 916, 986.5, 1070.5, 1107.5, 1136, > 1295, 1531, 1764.5, 1858, 1908.5, 2147.5)), row.names = c(NA, > -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_8 > structure(list( > Object = c(2514L, 2517L, 2519L, 2520L, 2521L, 2523L, 2525L, > 2526L, 2527L, 2528L, 2529L, 2530L, 2531L, 2532L, 2533L, 2534L, > 2536L, 2543L, 2545L, 2553L, 2557L, 2561L, 2566L), minimum = c(5L, > 44L, 121L, 153L, 224L, 380L, 546L, 603L, 668L, 721L, 802L, > 841L, 907L, 960L, 1006L, 1060L, 1106L, 1288L, 1518L, 1748L, > 1843L, 1893L, 2148L), maximum = c(23L, 86L, 146L, 170L, 242L, > 409L, 588L, 655L, 699L, 738L, 830L, 872L, 924L, 994L, 1029L, > 1084L, 1143L, 1302L, 1543L, 1781L, 1870L, 1925L, 2148L), > midpoint = c(14, 65, 133.5, 161.5, 233, 394.5, 567, 629, > 683.5, 729.5, 816, 856.5, 915.5, 977, 1017.5, 1072, 1124.5, > 1295, 1530.5, 1764.5, 1856.5, 1909, 2148)), row.names = c(NA, > -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_9 > structure(list( > Object = c(2492L, 2493L, 2497L, 2498L, 2499L, 2501L, 2503L, > 2504L, 2505L, 2506L, 2507L, 2508L, 2509L, 2510L, 2511L, 2513L, > 2516L, 2522L, 2524L, 2532L, 2535L, 2538L), minimum = c(6L, > 44L, 111L, 149L, 224L, 375L, 548L, 596L, 668L, 722L, 800L, > 840L, 908L, 960L, 1005L, 1058L, 1127L, 1289L, 1519L, 1748L, > 1842L, 1891L), maximum = c(24L, 81L, 137L, 167L, 242L, 403L, > 589L, 656L, 699L, 738L, 828L, 872L, 925L, 994L, 1028L, 1081L, > 1149L, 1302L, 1544L, 1780L, 1868L, 1924L), midpoint = c(15, > 62.5, 124, 158, 233, 389, 568.5, 626, 683.5, 730, 814, 856, > 916.5, 977, 1016.5, 1069.5, 1138, 1295.5, 1531.5, 1764, 1855, > 1907.5)), row.names = c(NA, -22L), class = c("tbl_df", "tbl", > "data.frame")), position_tab_10 = structure(list(Object = c(2469L, > 2471L, 2474L, 2475L, 2476L, 2478L, 2481L, 2482L, 2483L, 2484L, > 2485L, 2486L, 2487L, 2488L, 2489L, 2491L, 2495L, 2500L, 2502L, > 2512L, 2515L, 2518L), minimum = c(6L, 38L, 109L, 147L, 223L, > 363L, 548L, 597L, 668L, 719L, 803L, 839L, 908L, 958L, 1004L, > 1058L, 1126L, 1288L, 1519L, 1746L, 1841L, 1892L), maximum = c(24L, > 76L, 134L, 165L, 240L, 394L, 591L, 656L, 698L, 737L, 829L, 869L, > 924L, 996L, 1027L, 1081L, 1147L, 1301L, 1543L, 1781L, 1868L, > 1925L), midpoint = c(15, 57, 121.5, 156, 231.5, 378.5, 569.5, > 626.5, 683, 728, 816, 854, 916, 977, 1015.5, 1069.5, 1136.5, > 1294.5, 1531, 1763.5, 1854.5, 1908.5)), row.names = c(NA, -22L > ), class = c("tbl_df", "tbl", "data.frame"))) > > *What is produced when running the base code without any loops:* > > This is the base code without me trying to loop it in anyway, below is what > it produces when its used with dataframe 4 and 5: > > #the code: > list_df$position_tab_5$ID <- unlist(lapply(list_df$position_tab_5$midpoint, > function(x) ifelse(any(abs(x - list_df$position_tab_4$midpoint) <= 1),1,0))) > > ##position_tab_5 after manipulations have occured: > structure(list(Object = c(2580L, 2581L, 2585L, 2586L, 2589L, > 2590L, 2592L, 2593L, 2594L, 2595L, 2596L, 2597L, 2598L, 2599L, > 2601L, 2603L, 2605L, 2610L, 2616L, 2625L, 2629L, 2630L, 2634L > ), minimum = c(3L, 43L, 132L, 223L, 391L, 547L, 643L, 669L, 721L, > 808L, 836L, 861L, 908L, 978L, 1028L, 1057L, 1127L, 1288L, 1519L, > 1748L, 1842L, 1893L, 2148L), maximum = c(21L, 85L, 168L, 241L, > 419L, 629L, 656L, 701L, 738L, 828L, 858L, 890L, 925L, 1013L, > 1050L, 1083L, 1143L, 1301L, 1544L, 1780L, 1869L, 1925L, 2148L > ), midpoint = c(12, 64, 150, 232, 405, 588, 649.5, 685, 729.5, > 818, 847, 875.5, 916.5, 995.5, 1039, 1070, 1135, 1294.5, 1531.5, > 1764, 1855.5, 1909, 2148), ID = c(1, 1, 0, 1, 0, 0, 0, 1, 1, > 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0)), row.names = c(NA, > -23L), class = c("tbl_df", "tbl", "data.frame")) > > #position_tab_4 (the DF pos_tab_5 is being compared to) > structure(list(Object = c(2600L, 2604L, 2606L, 2609L, 2611L, > 2613L, 2614L, 2615L, 2617L, 2618L, 2619L, 2620L, 2621L, 2622L, > 2626L, 2628L, 2632L, 2639L, 2648L, 2651L, 2653L), minimum = c(4L, > 42L, 142L, 223L, 393L, 547L, 595L, 641L, 669L, 720L, 809L, 835L, > 908L, 979L, 1059L, 1127L, 1289L, 1519L, 1749L, 1841L, 1897L), > maximum = c(22L, 85L, 172L, 241L, 421L, 587L, 629L, 655L, > 701L, 738L, 826L, 890L, 925L, 1019L, 1084L, 1143L, 1301L, > 1544L, 1780L, 1868L, 1925L), midpoint = c(13, 63.5, 157, > 232, 407, 567, 612, 648, 685, 729, 817.5, 862.5, 916.5, 999, > 1071.5, 1135, 1295, 1531.5, 1764.5, 1854.5, 1911)), row.names = c(NA, > -21L), class = c("tbl_df", "tbl", "data.frame")) > > *Appreciate any help, anyone can provide!* > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.
Kathan Desai
2020-Jul-04 13:33 UTC
[R] Fwd: Help with looping a function over a list of dataframes:
---------- Forwarded message --------- From: Kathan Desai <kdesai1 at sheffield.ac.uk> Date: Sat, 4 Jul 2020 at 14:31 Subject: Re: [R] Help with looping a function over a list of dataframes: To: Jim Lemon <drjimlemon at gmail.com> Hi Jim, Thankyou for your reply, I tried the function you suggested and it doesn't seem to work. There are again no error messages produced, however the transformation to each position_tab_n table isn't being applied. Cheers, Kathan On Sat, 4 Jul 2020 at 11:14, Jim Lemon <drjimlemon at gmail.com> wrote:> Hi Kathan, > This is a very lazy answer as I haven't tested it. I think you will > need to wrap your loop in a function and return the modified list_df > to assign it like this: > > add_IDs<-function(xdf) { > for(i in seq_along(xdf)) { > xdf$position_tab_[[i]]$ID <- > unlist(lapply(xdf$position_tab_[[i]]$midpoint, function(x) > ifelse(any(abs(x - xdf$position_tab_[[i-1]]$midpoint)<= 1),1,0))) > } > return(xdf) > } > list_df<-add_IDs(list_df) > > Jim > > On Sat, Jul 4, 2020 at 4:48 PM Kathan Desai <kdesai1 at sheffield.ac.uk> > wrote: > > > > I have been trying to run a forloop for a function that compares > dataframe n > > with dataframe n-1, across a list of dataframes. It does this by checking > > each midpoint of dataframe n with each midpoint of dataframe n-1. This is > > done to make up for an disparity in row length. The idea of this code is > to > > identify any objects that are stationary, and assign them an id of 1, and > > the dynamic objects are assigned an id of 0 (examples can be found > below). > > > > > > *This is what i have so far:* > > for(i in seq_along(list_df)){ > > list_df$position_tab_[[i]]$ID <- > > unlist(lapply(list_df$position_tab_[[i]]$midpoint, function(x) > > ifelse(any(abs(x - > list_df$position_tab_[[i-1]]$midpoint) > > <= 1),1,0) > > )) > > } > > > > There is no error message being produced so theres nothing to debug, i am > > quite new to R programming in general so excuse any silly mistakes i may > > have made. The function doesnt seem to be adding the ID columns and > > comparing the data as it should. > > > > my list of dataframes contain dataframes named: position_tab_1, > > position_tab_2 .... position_tab_121. Each position_tab represents a > > timepoints, so in total there are 121 timepoints (frames). I need the > loop > > to run so that pos_tab_2 compares to pos_tab_1 and this continues all the > > way to pos_tab_121 comparing to pos_tab_120. > > > > The function adds a column named "id" to each of these dataframes as it > > compares to the dataframe before it, so all dataframes apart from > > position_tab_1 (as it has nothing to compare to) should have this added. > > > > > > *Some of my data (first 10 dataframes in list):* > > > dput(list_df[1:10]) > > list(position_tab_1 = structure(list(Object = c(2666L, 2668L, > > 2671L, 2674L, 2676L, 2677L, 2678L, 2679L, 2680L, 2682L, 2683L, > > 2684L, 2685L, 2686L, 2687L, 2689L, 2692L, 2693L, 2694L, 2695L, > > 2696L), minimum = c(4L, 39L, 147L, 224L, 419L, 531L, 595L, 641L, > > 669L, 723L, 810L, 836L, 907L, 978L, 1061L, 1129L, 1290L, 1519L, > > 1749L, 1843L, 1897L), maximum = c(22L, 85L, 173L, 242L, 449L, > > 587L, 627L, 655L, 702L, 740L, 828L, 890L, 923L, 1024L, 1086L, > > 1144L, 1302L, 1544L, 1780L, 1870L, 1925L), midpoint = c(13, 62, > > 160, 233, 434, 559, 611, 648, 685.5, 731.5, 819, 863, 915, 1001, > > 1073.5, 1136.5, 1296, 1531.5, 1764.5, 1856.5, 1911)), row.names = c(NA, > > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_2 > > structure(list( > > Object = c(2645L, 2646L, 2650L, 2652L, 2655L, 2656L, 2657L, > > 2658L, 2659L, 2661L, 2662L, 2663L, 2664L, 2665L, 2667L, 2670L, > > 2675L, 2681L, 2688L, 2690L, 2691L), minimum = c(4L, 40L, > > 147L, 224L, 415L, 532L, 595L, 641L, 670L, 722L, 811L, 835L, > > 907L, 978L, 1061L, 1128L, 1289L, 1520L, 1748L, 1843L, 1897L > > ), maximum = c(22L, 85L, 173L, 242L, 445L, 588L, 627L, 655L, > > 702L, 739L, 828L, 891L, 923L, 1022L, 1085L, 1143L, 1302L, > > 1544L, 1779L, 1870L, 1925L), midpoint = c(13, 62.5, 160, > > 233, 430, 560, 611, 648, 686, 730.5, 819.5, 863, 915, 1000, > > 1073, 1135.5, 1295.5, 1532, 1763.5, 1856.5, 1911)), row.names = c(NA, > > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_3 > > structure(list( > > Object = c(2623L, 2624L, 2627L, 2631L, 2633L, 2635L, 2636L, > > 2637L, 2638L, 2640L, 2641L, 2642L, 2643L, 2644L, 2647L, 2649L, > > 2654L, 2660L, 2669L, 2672L, 2673L), minimum = c(3L, 39L, > > 149L, 223L, 402L, 539L, 594L, 639L, 669L, 722L, 811L, 834L, > > 907L, 979L, 1060L, 1129L, 1289L, 1520L, 1749L, 1842L, 1897L > > ), maximum = c(22L, 86L, 175L, 241L, 431L, 587L, 627L, 653L, > > 700L, 738L, 828L, 894L, 925L, 1021L, 1084L, 1144L, 1302L, > > 1544L, 1779L, 1869L, 1925L), midpoint = c(12.5, 62.5, 162, > > 232, 416.5, 563, 610.5, 646, 684.5, 730, 819.5, 864, 916, > > 1000, 1072, 1136.5, 1295.5, 1532, 1764, 1855.5, 1911)), row.names > > c(NA, > > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_4 > > structure(list( > > Object = c(2600L, 2604L, 2606L, 2609L, 2611L, 2613L, 2614L, > > 2615L, 2617L, 2618L, 2619L, 2620L, 2621L, 2622L, 2626L, 2628L, > > 2632L, 2639L, 2648L, 2651L, 2653L), minimum = c(4L, 42L, > > 142L, 223L, 393L, 547L, 595L, 641L, 669L, 720L, 809L, 835L, > > 908L, 979L, 1059L, 1127L, 1289L, 1519L, 1749L, 1841L, 1897L > > ), maximum = c(22L, 85L, 172L, 241L, 421L, 587L, 629L, 655L, > > 701L, 738L, 826L, 890L, 925L, 1019L, 1084L, 1143L, 1301L, > > 1544L, 1780L, 1868L, 1925L), midpoint = c(13, 63.5, 157, > > 232, 407, 567, 612, 648, 685, 729, 817.5, 862.5, 916.5, 999, > > 1071.5, 1135, 1295, 1531.5, 1764.5, 1854.5, 1911)), row.names = c(NA, > > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_5 > > structure(list( > > Object = c(2580L, 2581L, 2585L, 2586L, 2589L, 2590L, 2592L, > > 2593L, 2594L, 2595L, 2596L, 2597L, 2598L, 2599L, 2601L, 2603L, > > 2605L, 2610L, 2616L, 2625L, 2629L, 2630L, 2634L), minimum = c(3L, > > 43L, 132L, 223L, 391L, 547L, 643L, 669L, 721L, 808L, 836L, > > 861L, 908L, 978L, 1028L, 1057L, 1127L, 1288L, 1519L, 1748L, > > 1842L, 1893L, 2148L), maximum = c(21L, 85L, 168L, 241L, 419L, > > 629L, 656L, 701L, 738L, 828L, 858L, 890L, 925L, 1013L, 1050L, > > 1083L, 1143L, 1301L, 1544L, 1780L, 1869L, 1925L, 2148L), > > midpoint = c(12, 64, 150, 232, 405, 588, 649.5, 685, 729.5, > > 818, 847, 875.5, 916.5, 995.5, 1039, 1070, 1135, 1294.5, > > 1531.5, 1764, 1855.5, 1909, 2148)), row.names = c(NA, -23L > > ), class = c("tbl_df", "tbl", "data.frame")), position_tab_6 > > structure(list( > > Object = c(2555L, 2559L, 2562L, 2563L, 2564L, 2567L, 2569L, > > 2570L, 2571L, 2572L, 2573L, 2574L, 2575L, 2576L, 2577L, 2579L, > > 2583L, 2587L, 2591L, 2602L, 2607L, 2608L, 2612L), minimum = c(4L, > > 45L, 123L, 154L, 224L, 390L, 546L, 600L, 643L, 669L, 720L, > > 804L, 836L, 908L, 967L, 1058L, 1129L, 1289L, 1519L, 1748L, > > 1843L, 1893L, 2147L), maximum = c(23L, 86L, 150L, 171L, 241L, > > 419L, 589L, 636L, 657L, 701L, 738L, 827L, 879L, 925L, 1011L, > > 1084L, 1144L, 1301L, 1543L, 1780L, 1871L, 1924L, 2148L), > > midpoint = c(13.5, 65.5, 136.5, 162.5, 232.5, 404.5, 567.5, > > 618, 650, 685, 729, 815.5, 857.5, 916.5, 989, 1071, 1136.5, > > 1295, 1531, 1764, 1857, 1908.5, 2147.5)), row.names = c(NA, > > -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_7 > > structure(list( > > Object = c(2537L, 2539L, 2540L, 2541L, 2542L, 2544L, 2546L, > > 2547L, 2548L, 2549L, 2550L, 2551L, 2552L, 2554L, 2556L, 2558L, > > 2560L, 2565L, 2568L, 2578L, 2582L, 2584L, 2588L), minimum = c(3L, > > 45L, 122L, 156L, 224L, 387L, 546L, 601L, 669L, 719L, 803L, > > 837L, 908L, 959L, 1059L, 1096L, 1128L, 1289L, 1519L, 1748L, > > 1844L, 1892L, 2147L), maximum = c(22L, 86L, 147L, 172L, 241L, > > 415L, 590L, 656L, 699L, 738L, 830L, 871L, 924L, 1014L, 1082L, > > 1119L, 1144L, 1301L, 1543L, 1781L, 1872L, 1925L, 2148L), > > midpoint = c(12.5, 65.5, 134.5, 164, 232.5, 401, 568, 628.5, > > 684, 728.5, 816.5, 854, 916, 986.5, 1070.5, 1107.5, 1136, > > 1295, 1531, 1764.5, 1858, 1908.5, 2147.5)), row.names = c(NA, > > -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_8 > > structure(list( > > Object = c(2514L, 2517L, 2519L, 2520L, 2521L, 2523L, 2525L, > > 2526L, 2527L, 2528L, 2529L, 2530L, 2531L, 2532L, 2533L, 2534L, > > 2536L, 2543L, 2545L, 2553L, 2557L, 2561L, 2566L), minimum = c(5L, > > 44L, 121L, 153L, 224L, 380L, 546L, 603L, 668L, 721L, 802L, > > 841L, 907L, 960L, 1006L, 1060L, 1106L, 1288L, 1518L, 1748L, > > 1843L, 1893L, 2148L), maximum = c(23L, 86L, 146L, 170L, 242L, > > 409L, 588L, 655L, 699L, 738L, 830L, 872L, 924L, 994L, 1029L, > > 1084L, 1143L, 1302L, 1543L, 1781L, 1870L, 1925L, 2148L), > > midpoint = c(14, 65, 133.5, 161.5, 233, 394.5, 567, 629, > > 683.5, 729.5, 816, 856.5, 915.5, 977, 1017.5, 1072, 1124.5, > > 1295, 1530.5, 1764.5, 1856.5, 1909, 2148)), row.names = c(NA, > > -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_9 > > structure(list( > > Object = c(2492L, 2493L, 2497L, 2498L, 2499L, 2501L, 2503L, > > 2504L, 2505L, 2506L, 2507L, 2508L, 2509L, 2510L, 2511L, 2513L, > > 2516L, 2522L, 2524L, 2532L, 2535L, 2538L), minimum = c(6L, > > 44L, 111L, 149L, 224L, 375L, 548L, 596L, 668L, 722L, 800L, > > 840L, 908L, 960L, 1005L, 1058L, 1127L, 1289L, 1519L, 1748L, > > 1842L, 1891L), maximum = c(24L, 81L, 137L, 167L, 242L, 403L, > > 589L, 656L, 699L, 738L, 828L, 872L, 925L, 994L, 1028L, 1081L, > > 1149L, 1302L, 1544L, 1780L, 1868L, 1924L), midpoint = c(15, > > 62.5, 124, 158, 233, 389, 568.5, 626, 683.5, 730, 814, 856, > > 916.5, 977, 1016.5, 1069.5, 1138, 1295.5, 1531.5, 1764, 1855, > > 1907.5)), row.names = c(NA, -22L), class = c("tbl_df", "tbl", > > "data.frame")), position_tab_10 = structure(list(Object = c(2469L, > > 2471L, 2474L, 2475L, 2476L, 2478L, 2481L, 2482L, 2483L, 2484L, > > 2485L, 2486L, 2487L, 2488L, 2489L, 2491L, 2495L, 2500L, 2502L, > > 2512L, 2515L, 2518L), minimum = c(6L, 38L, 109L, 147L, 223L, > > 363L, 548L, 597L, 668L, 719L, 803L, 839L, 908L, 958L, 1004L, > > 1058L, 1126L, 1288L, 1519L, 1746L, 1841L, 1892L), maximum = c(24L, > > 76L, 134L, 165L, 240L, 394L, 591L, 656L, 698L, 737L, 829L, 869L, > > 924L, 996L, 1027L, 1081L, 1147L, 1301L, 1543L, 1781L, 1868L, > > 1925L), midpoint = c(15, 57, 121.5, 156, 231.5, 378.5, 569.5, > > 626.5, 683, 728, 816, 854, 916, 977, 1015.5, 1069.5, 1136.5, > > 1294.5, 1531, 1763.5, 1854.5, 1908.5)), row.names = c(NA, -22L > > ), class = c("tbl_df", "tbl", "data.frame"))) > > > > *What is produced when running the base code without any loops:* > > > > This is the base code without me trying to loop it in anyway, below is > what > > it produces when its used with dataframe 4 and 5: > > > > #the code: > > list_df$position_tab_5$ID <- > unlist(lapply(list_df$position_tab_5$midpoint, > > function(x) ifelse(any(abs(x - list_df$position_tab_4$midpoint) <> 1),1,0))) > > > > ##position_tab_5 after manipulations have occured: > > structure(list(Object = c(2580L, 2581L, 2585L, 2586L, 2589L, > > 2590L, 2592L, 2593L, 2594L, 2595L, 2596L, 2597L, 2598L, 2599L, > > 2601L, 2603L, 2605L, 2610L, 2616L, 2625L, 2629L, 2630L, 2634L > > ), minimum = c(3L, 43L, 132L, 223L, 391L, 547L, 643L, 669L, 721L, > > 808L, 836L, 861L, 908L, 978L, 1028L, 1057L, 1127L, 1288L, 1519L, > > 1748L, 1842L, 1893L, 2148L), maximum = c(21L, 85L, 168L, 241L, > > 419L, 629L, 656L, 701L, 738L, 828L, 858L, 890L, 925L, 1013L, > > 1050L, 1083L, 1143L, 1301L, 1544L, 1780L, 1869L, 1925L, 2148L > > ), midpoint = c(12, 64, 150, 232, 405, 588, 649.5, 685, 729.5, > > 818, 847, 875.5, 916.5, 995.5, 1039, 1070, 1135, 1294.5, 1531.5, > > 1764, 1855.5, 1909, 2148), ID = c(1, 1, 0, 1, 0, 0, 0, 1, 1, > > 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0)), row.names = c(NA, > > -23L), class = c("tbl_df", "tbl", "data.frame")) > > > > #position_tab_4 (the DF pos_tab_5 is being compared to) > > structure(list(Object = c(2600L, 2604L, 2606L, 2609L, 2611L, > > 2613L, 2614L, 2615L, 2617L, 2618L, 2619L, 2620L, 2621L, 2622L, > > 2626L, 2628L, 2632L, 2639L, 2648L, 2651L, 2653L), minimum = c(4L, > > 42L, 142L, 223L, 393L, 547L, 595L, 641L, 669L, 720L, 809L, 835L, > > 908L, 979L, 1059L, 1127L, 1289L, 1519L, 1749L, 1841L, 1897L), > > maximum = c(22L, 85L, 172L, 241L, 421L, 587L, 629L, 655L, > > 701L, 738L, 826L, 890L, 925L, 1019L, 1084L, 1143L, 1301L, > > 1544L, 1780L, 1868L, 1925L), midpoint = c(13, 63.5, 157, > > 232, 407, 567, 612, 648, 685, 729, 817.5, 862.5, 916.5, 999, > > 1071.5, 1135, 1295, 1531.5, 1764.5, 1854.5, 1911)), row.names = c(NA, > > -21L), class = c("tbl_df", "tbl", "data.frame")) > > > > *Appreciate any help, anyone can provide!* > > > > [[alternative HTML version deleted]] > > > > ______________________________________________ > > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > > https://stat.ethz.ch/mailman/listinfo/r-help > > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > > and provide commented, minimal, self-contained, reproducible code. >[[alternative HTML version deleted]]
Eric Berger
2020-Jul-04 13:43 UTC
[R] Fwd: Help with looping a function over a list of dataframes:
Hi Kathan, How about trying to create a *minimal* reproducible example, e.g. with a list of two data frames, where each data frame has 5 rows,? My guess is that there is a good chance that when you try to create such an example, you will discover the problem yourself. In the event that you create the example but still cannot solve your issue, you will find more people on this list willing to look into your question, as it will be much faster for them to do that (compared to the original formulation.) Eric On Sat, Jul 4, 2020 at 4:33 PM Kathan Desai <kdesai1 at sheffield.ac.uk> wrote:> ---------- Forwarded message --------- > From: Kathan Desai <kdesai1 at sheffield.ac.uk> > Date: Sat, 4 Jul 2020 at 14:31 > Subject: Re: [R] Help with looping a function over a list of dataframes: > To: Jim Lemon <drjimlemon at gmail.com> > > > Hi Jim, > > Thankyou for your reply, I tried the function you suggested and it > doesn't seem to work. There are again no error messages produced, however > the transformation to each position_tab_n table isn't being applied. > > Cheers, > Kathan > > > On Sat, 4 Jul 2020 at 11:14, Jim Lemon <drjimlemon at gmail.com> wrote: > > > Hi Kathan, > > This is a very lazy answer as I haven't tested it. I think you will > > need to wrap your loop in a function and return the modified list_df > > to assign it like this: > > > > add_IDs<-function(xdf) { > > for(i in seq_along(xdf)) { > > xdf$position_tab_[[i]]$ID <- > > unlist(lapply(xdf$position_tab_[[i]]$midpoint, function(x) > > ifelse(any(abs(x - xdf$position_tab_[[i-1]]$midpoint)<= 1),1,0))) > > } > > return(xdf) > > } > > list_df<-add_IDs(list_df) > > > > Jim > > > > On Sat, Jul 4, 2020 at 4:48 PM Kathan Desai <kdesai1 at sheffield.ac.uk> > > wrote: > > > > > > I have been trying to run a forloop for a function that compares > > dataframe n > > > with dataframe n-1, across a list of dataframes. It does this by > checking > > > each midpoint of dataframe n with each midpoint of dataframe n-1. This > is > > > done to make up for an disparity in row length. The idea of this code > is > > to > > > identify any objects that are stationary, and assign them an id of 1, > and > > > the dynamic objects are assigned an id of 0 (examples can be found > > below). > > > > > > > > > *This is what i have so far:* > > > for(i in seq_along(list_df)){ > > > list_df$position_tab_[[i]]$ID <- > > > unlist(lapply(list_df$position_tab_[[i]]$midpoint, function(x) > > > ifelse(any(abs(x - > > list_df$position_tab_[[i-1]]$midpoint) > > > <= 1),1,0) > > > )) > > > } > > > > > > There is no error message being produced so theres nothing to debug, i > am > > > quite new to R programming in general so excuse any silly mistakes i > may > > > have made. The function doesnt seem to be adding the ID columns and > > > comparing the data as it should. > > > > > > my list of dataframes contain dataframes named: position_tab_1, > > > position_tab_2 .... position_tab_121. Each position_tab represents a > > > timepoints, so in total there are 121 timepoints (frames). I need the > > loop > > > to run so that pos_tab_2 compares to pos_tab_1 and this continues all > the > > > way to pos_tab_121 comparing to pos_tab_120. > > > > > > The function adds a column named "id" to each of these dataframes as it > > > compares to the dataframe before it, so all dataframes apart from > > > position_tab_1 (as it has nothing to compare to) should have this > added. > > > > > > > > > *Some of my data (first 10 dataframes in list):* > > > > dput(list_df[1:10]) > > > list(position_tab_1 = structure(list(Object = c(2666L, 2668L, > > > 2671L, 2674L, 2676L, 2677L, 2678L, 2679L, 2680L, 2682L, 2683L, > > > 2684L, 2685L, 2686L, 2687L, 2689L, 2692L, 2693L, 2694L, 2695L, > > > 2696L), minimum = c(4L, 39L, 147L, 224L, 419L, 531L, 595L, 641L, > > > 669L, 723L, 810L, 836L, 907L, 978L, 1061L, 1129L, 1290L, 1519L, > > > 1749L, 1843L, 1897L), maximum = c(22L, 85L, 173L, 242L, 449L, > > > 587L, 627L, 655L, 702L, 740L, 828L, 890L, 923L, 1024L, 1086L, > > > 1144L, 1302L, 1544L, 1780L, 1870L, 1925L), midpoint = c(13, 62, > > > 160, 233, 434, 559, 611, 648, 685.5, 731.5, 819, 863, 915, 1001, > > > 1073.5, 1136.5, 1296, 1531.5, 1764.5, 1856.5, 1911)), row.names = c(NA, > > > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_2 > > > structure(list( > > > Object = c(2645L, 2646L, 2650L, 2652L, 2655L, 2656L, 2657L, > > > 2658L, 2659L, 2661L, 2662L, 2663L, 2664L, 2665L, 2667L, 2670L, > > > 2675L, 2681L, 2688L, 2690L, 2691L), minimum = c(4L, 40L, > > > 147L, 224L, 415L, 532L, 595L, 641L, 670L, 722L, 811L, 835L, > > > 907L, 978L, 1061L, 1128L, 1289L, 1520L, 1748L, 1843L, 1897L > > > ), maximum = c(22L, 85L, 173L, 242L, 445L, 588L, 627L, 655L, > > > 702L, 739L, 828L, 891L, 923L, 1022L, 1085L, 1143L, 1302L, > > > 1544L, 1779L, 1870L, 1925L), midpoint = c(13, 62.5, 160, > > > 233, 430, 560, 611, 648, 686, 730.5, 819.5, 863, 915, 1000, > > > 1073, 1135.5, 1295.5, 1532, 1763.5, 1856.5, 1911)), row.names > c(NA, > > > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_3 > > > structure(list( > > > Object = c(2623L, 2624L, 2627L, 2631L, 2633L, 2635L, 2636L, > > > 2637L, 2638L, 2640L, 2641L, 2642L, 2643L, 2644L, 2647L, 2649L, > > > 2654L, 2660L, 2669L, 2672L, 2673L), minimum = c(3L, 39L, > > > 149L, 223L, 402L, 539L, 594L, 639L, 669L, 722L, 811L, 834L, > > > 907L, 979L, 1060L, 1129L, 1289L, 1520L, 1749L, 1842L, 1897L > > > ), maximum = c(22L, 86L, 175L, 241L, 431L, 587L, 627L, 653L, > > > 700L, 738L, 828L, 894L, 925L, 1021L, 1084L, 1144L, 1302L, > > > 1544L, 1779L, 1869L, 1925L), midpoint = c(12.5, 62.5, 162, > > > 232, 416.5, 563, 610.5, 646, 684.5, 730, 819.5, 864, 916, > > > 1000, 1072, 1136.5, 1295.5, 1532, 1764, 1855.5, 1911)), row.names > > > c(NA, > > > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_4 > > > structure(list( > > > Object = c(2600L, 2604L, 2606L, 2609L, 2611L, 2613L, 2614L, > > > 2615L, 2617L, 2618L, 2619L, 2620L, 2621L, 2622L, 2626L, 2628L, > > > 2632L, 2639L, 2648L, 2651L, 2653L), minimum = c(4L, 42L, > > > 142L, 223L, 393L, 547L, 595L, 641L, 669L, 720L, 809L, 835L, > > > 908L, 979L, 1059L, 1127L, 1289L, 1519L, 1749L, 1841L, 1897L > > > ), maximum = c(22L, 85L, 172L, 241L, 421L, 587L, 629L, 655L, > > > 701L, 738L, 826L, 890L, 925L, 1019L, 1084L, 1143L, 1301L, > > > 1544L, 1780L, 1868L, 1925L), midpoint = c(13, 63.5, 157, > > > 232, 407, 567, 612, 648, 685, 729, 817.5, 862.5, 916.5, 999, > > > 1071.5, 1135, 1295, 1531.5, 1764.5, 1854.5, 1911)), row.names > c(NA, > > > -21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_5 > > > structure(list( > > > Object = c(2580L, 2581L, 2585L, 2586L, 2589L, 2590L, 2592L, > > > 2593L, 2594L, 2595L, 2596L, 2597L, 2598L, 2599L, 2601L, 2603L, > > > 2605L, 2610L, 2616L, 2625L, 2629L, 2630L, 2634L), minimum = c(3L, > > > 43L, 132L, 223L, 391L, 547L, 643L, 669L, 721L, 808L, 836L, > > > 861L, 908L, 978L, 1028L, 1057L, 1127L, 1288L, 1519L, 1748L, > > > 1842L, 1893L, 2148L), maximum = c(21L, 85L, 168L, 241L, 419L, > > > 629L, 656L, 701L, 738L, 828L, 858L, 890L, 925L, 1013L, 1050L, > > > 1083L, 1143L, 1301L, 1544L, 1780L, 1869L, 1925L, 2148L), > > > midpoint = c(12, 64, 150, 232, 405, 588, 649.5, 685, 729.5, > > > 818, 847, 875.5, 916.5, 995.5, 1039, 1070, 1135, 1294.5, > > > 1531.5, 1764, 1855.5, 1909, 2148)), row.names = c(NA, -23L > > > ), class = c("tbl_df", "tbl", "data.frame")), position_tab_6 > > > structure(list( > > > Object = c(2555L, 2559L, 2562L, 2563L, 2564L, 2567L, 2569L, > > > 2570L, 2571L, 2572L, 2573L, 2574L, 2575L, 2576L, 2577L, 2579L, > > > 2583L, 2587L, 2591L, 2602L, 2607L, 2608L, 2612L), minimum = c(4L, > > > 45L, 123L, 154L, 224L, 390L, 546L, 600L, 643L, 669L, 720L, > > > 804L, 836L, 908L, 967L, 1058L, 1129L, 1289L, 1519L, 1748L, > > > 1843L, 1893L, 2147L), maximum = c(23L, 86L, 150L, 171L, 241L, > > > 419L, 589L, 636L, 657L, 701L, 738L, 827L, 879L, 925L, 1011L, > > > 1084L, 1144L, 1301L, 1543L, 1780L, 1871L, 1924L, 2148L), > > > midpoint = c(13.5, 65.5, 136.5, 162.5, 232.5, 404.5, 567.5, > > > 618, 650, 685, 729, 815.5, 857.5, 916.5, 989, 1071, 1136.5, > > > 1295, 1531, 1764, 1857, 1908.5, 2147.5)), row.names = c(NA, > > > -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_7 > > > structure(list( > > > Object = c(2537L, 2539L, 2540L, 2541L, 2542L, 2544L, 2546L, > > > 2547L, 2548L, 2549L, 2550L, 2551L, 2552L, 2554L, 2556L, 2558L, > > > 2560L, 2565L, 2568L, 2578L, 2582L, 2584L, 2588L), minimum = c(3L, > > > 45L, 122L, 156L, 224L, 387L, 546L, 601L, 669L, 719L, 803L, > > > 837L, 908L, 959L, 1059L, 1096L, 1128L, 1289L, 1519L, 1748L, > > > 1844L, 1892L, 2147L), maximum = c(22L, 86L, 147L, 172L, 241L, > > > 415L, 590L, 656L, 699L, 738L, 830L, 871L, 924L, 1014L, 1082L, > > > 1119L, 1144L, 1301L, 1543L, 1781L, 1872L, 1925L, 2148L), > > > midpoint = c(12.5, 65.5, 134.5, 164, 232.5, 401, 568, 628.5, > > > 684, 728.5, 816.5, 854, 916, 986.5, 1070.5, 1107.5, 1136, > > > 1295, 1531, 1764.5, 1858, 1908.5, 2147.5)), row.names = c(NA, > > > -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_8 > > > structure(list( > > > Object = c(2514L, 2517L, 2519L, 2520L, 2521L, 2523L, 2525L, > > > 2526L, 2527L, 2528L, 2529L, 2530L, 2531L, 2532L, 2533L, 2534L, > > > 2536L, 2543L, 2545L, 2553L, 2557L, 2561L, 2566L), minimum = c(5L, > > > 44L, 121L, 153L, 224L, 380L, 546L, 603L, 668L, 721L, 802L, > > > 841L, 907L, 960L, 1006L, 1060L, 1106L, 1288L, 1518L, 1748L, > > > 1843L, 1893L, 2148L), maximum = c(23L, 86L, 146L, 170L, 242L, > > > 409L, 588L, 655L, 699L, 738L, 830L, 872L, 924L, 994L, 1029L, > > > 1084L, 1143L, 1302L, 1543L, 1781L, 1870L, 1925L, 2148L), > > > midpoint = c(14, 65, 133.5, 161.5, 233, 394.5, 567, 629, > > > 683.5, 729.5, 816, 856.5, 915.5, 977, 1017.5, 1072, 1124.5, > > > 1295, 1530.5, 1764.5, 1856.5, 1909, 2148)), row.names = c(NA, > > > -23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_9 > > > structure(list( > > > Object = c(2492L, 2493L, 2497L, 2498L, 2499L, 2501L, 2503L, > > > 2504L, 2505L, 2506L, 2507L, 2508L, 2509L, 2510L, 2511L, 2513L, > > > 2516L, 2522L, 2524L, 2532L, 2535L, 2538L), minimum = c(6L, > > > 44L, 111L, 149L, 224L, 375L, 548L, 596L, 668L, 722L, 800L, > > > 840L, 908L, 960L, 1005L, 1058L, 1127L, 1289L, 1519L, 1748L, > > > 1842L, 1891L), maximum = c(24L, 81L, 137L, 167L, 242L, 403L, > > > 589L, 656L, 699L, 738L, 828L, 872L, 925L, 994L, 1028L, 1081L, > > > 1149L, 1302L, 1544L, 1780L, 1868L, 1924L), midpoint = c(15, > > > 62.5, 124, 158, 233, 389, 568.5, 626, 683.5, 730, 814, 856, > > > 916.5, 977, 1016.5, 1069.5, 1138, 1295.5, 1531.5, 1764, 1855, > > > 1907.5)), row.names = c(NA, -22L), class = c("tbl_df", "tbl", > > > "data.frame")), position_tab_10 = structure(list(Object = c(2469L, > > > 2471L, 2474L, 2475L, 2476L, 2478L, 2481L, 2482L, 2483L, 2484L, > > > 2485L, 2486L, 2487L, 2488L, 2489L, 2491L, 2495L, 2500L, 2502L, > > > 2512L, 2515L, 2518L), minimum = c(6L, 38L, 109L, 147L, 223L, > > > 363L, 548L, 597L, 668L, 719L, 803L, 839L, 908L, 958L, 1004L, > > > 1058L, 1126L, 1288L, 1519L, 1746L, 1841L, 1892L), maximum = c(24L, > > > 76L, 134L, 165L, 240L, 394L, 591L, 656L, 698L, 737L, 829L, 869L, > > > 924L, 996L, 1027L, 1081L, 1147L, 1301L, 1543L, 1781L, 1868L, > > > 1925L), midpoint = c(15, 57, 121.5, 156, 231.5, 378.5, 569.5, > > > 626.5, 683, 728, 816, 854, 916, 977, 1015.5, 1069.5, 1136.5, > > > 1294.5, 1531, 1763.5, 1854.5, 1908.5)), row.names = c(NA, -22L > > > ), class = c("tbl_df", "tbl", "data.frame"))) > > > > > > *What is produced when running the base code without any loops:* > > > > > > This is the base code without me trying to loop it in anyway, below is > > what > > > it produces when its used with dataframe 4 and 5: > > > > > > #the code: > > > list_df$position_tab_5$ID <- > > unlist(lapply(list_df$position_tab_5$midpoint, > > > function(x) ifelse(any(abs(x - list_df$position_tab_4$midpoint) <> > 1),1,0))) > > > > > > ##position_tab_5 after manipulations have occured: > > > structure(list(Object = c(2580L, 2581L, 2585L, 2586L, 2589L, > > > 2590L, 2592L, 2593L, 2594L, 2595L, 2596L, 2597L, 2598L, 2599L, > > > 2601L, 2603L, 2605L, 2610L, 2616L, 2625L, 2629L, 2630L, 2634L > > > ), minimum = c(3L, 43L, 132L, 223L, 391L, 547L, 643L, 669L, 721L, > > > 808L, 836L, 861L, 908L, 978L, 1028L, 1057L, 1127L, 1288L, 1519L, > > > 1748L, 1842L, 1893L, 2148L), maximum = c(21L, 85L, 168L, 241L, > > > 419L, 629L, 656L, 701L, 738L, 828L, 858L, 890L, 925L, 1013L, > > > 1050L, 1083L, 1143L, 1301L, 1544L, 1780L, 1869L, 1925L, 2148L > > > ), midpoint = c(12, 64, 150, 232, 405, 588, 649.5, 685, 729.5, > > > 818, 847, 875.5, 916.5, 995.5, 1039, 1070, 1135, 1294.5, 1531.5, > > > 1764, 1855.5, 1909, 2148), ID = c(1, 1, 0, 1, 0, 0, 0, 1, 1, > > > 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0)), row.names = c(NA, > > > -23L), class = c("tbl_df", "tbl", "data.frame")) > > > > > > #position_tab_4 (the DF pos_tab_5 is being compared to) > > > structure(list(Object = c(2600L, 2604L, 2606L, 2609L, 2611L, > > > 2613L, 2614L, 2615L, 2617L, 2618L, 2619L, 2620L, 2621L, 2622L, > > > 2626L, 2628L, 2632L, 2639L, 2648L, 2651L, 2653L), minimum = c(4L, > > > 42L, 142L, 223L, 393L, 547L, 595L, 641L, 669L, 720L, 809L, 835L, > > > 908L, 979L, 1059L, 1127L, 1289L, 1519L, 1749L, 1841L, 1897L), > > > maximum = c(22L, 85L, 172L, 241L, 421L, 587L, 629L, 655L, > > > 701L, 738L, 826L, 890L, 925L, 1019L, 1084L, 1143L, 1301L, > > > 1544L, 1780L, 1868L, 1925L), midpoint = c(13, 63.5, 157, > > > 232, 407, 567, 612, 648, 685, 729, 817.5, 862.5, 916.5, 999, > > > 1071.5, 1135, 1295, 1531.5, 1764.5, 1854.5, 1911)), row.names > c(NA, > > > -21L), class = c("tbl_df", "tbl", "data.frame")) > > > > > > *Appreciate any help, anyone can provide!* > > > > > > [[alternative HTML version deleted]] > > > > > > ______________________________________________ > > > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > > > https://stat.ethz.ch/mailman/listinfo/r-help > > > PLEASE do read the posting guide > > http://www.R-project.org/posting-guide.html > > > and provide commented, minimal, self-contained, reproducible code. > > > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. >[[alternative HTML version deleted]]