如何使用R根据数据框中单个列的最小值对特定列中的行进行子集

问题描述:

我们有一个数据框架,其中包含1000多个行和多个列.样本数据框如下所示

we have a data frame that has 1000's of rows with multiple columns. the sample data frame is presented below

df1 <- data.frame(X = c(7.48, 7.82, 8.15, 8.47, 8.80, 9.20, 9.51, 9.83, 10.13, 10.59, 7.59, 8.06, 8.39, 8.87, 9.26, 9.64, 10.09, 10.48, 10.88, 11.45), 
              Y = c(49.16, 48.78, 48.40, 48.03, 47.65, 47.24, 46.87, 46.51, 46.15, 45.73, 48.70, 48.18, 47.72, 47.20, 46.71, 46.23, 45.72, 45.24, 44.77, 44.23), 
              ID = c("B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2"), 
              TI = c(191.31, 191.35, 191.39, 191.44, 191.48, 191.52, 191.56, 191.60, 191.64, 191.69, 1349.93, 1349.97, 1350.01, 1350.05, 1350.09, 1350.14, 1350.18, 1350.22, 1350.26, 1350.30),
              X0 = c(0.172, 0.344,0.846,1.335,1.838,2.410,2.89,3.37,3.842,4.46,0.361,0.983,1.545,2.241,2.86,3.47,4.15,4.77,5.388,6.164),
              V2 = c(1.154,0.644,0.141,0.348,0.851,1.423,1.9059,2.3875,2.856,3.475,0.771,0.224,0.596,1.262,1.883,2.493,3.168,3.786,4.402,5.177))

在数据帧"df1"中,我们希望分别基于第5列和第6列中的最小值,对ID为1:4的行进行ID子集化.

in the dataframe 'df1' we would like to subset rows by ID wise in the columns 1:4 based on minimum values in the 5th column and 6th column respectively.

例如,在数据帧"df1"中,ID"B_1"在"X0"列中,对于同一ID,在第"V2"列中的最小值为0.172,最小值为0.140.因此,我们希望将列1:4的第1行和第3行以及它们对应的df1数据帧的X0th和V2th值子集化,如下图所示.同样对于ID"B_1_2"还.像变量"X0","V2"变量一样,我们的数据集中有20多个变量.

for instance, in the data frame 'df1', ID "B_1" in 'X0' column, 0.172 is the minimum value and 0.140 is the minimum value in 'V2'th column for the same ID. so we would like to subset 1st row and 3rd row from column 1:4 along with their corresponding X0th and V2th value of the df1 data frame as shown in the below figure. likewise for the ID "B_1_2" also. like variables 'X0', 'V2' variables we have more than 20 variables in my dataset.

预期输出如下图所示

要获得所需的输出,我尝试了如下所示的代码

to get the desired output I tried the code as presented below

library(data.table)
df1=as.data.table(df1)
a <- do.call(rbind,
    apply(df1,1,function(i){
     df1[df1[,.I[(X0)==min(X0)],by=ID]$V1]
    })
)

上面的代码中有问题.我正在寻找获得所需输出的代码

there are issues in the above code. i am looking for the code to get the desired output

我们可以使用 map 遍历按"ID"分组的"X0","V2"列,对行进行切片其中该循环列的值为 min ,将它们绑定在一起( _dfr )并使用这些列的 pmin 创建'd'列

We can use map to loop over the columns 'X0', 'V2', grouped by 'ID', slice the rows where the value is min for that looped column, bind them together (_dfr) and create the 'd' column with pmin of those columns

library(dplyr)
library(purrr)
nm1 <- names(df1)[5:6]
map_dfr(nm1, ~ df1 %>%
         group_by(ID) %>%
         slice_min(!! rlang::sym(.x))) %>% 
     ungroup %>%
     mutate(d = select(., all_of(nm1)) %>% reduce(pmin))

-输出

# A tibble: 4 x 7
#      X     Y ID       TI    X0    V2     d
#  <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
#1  8.47  48.0 B_1    191. 0.134 0.348 0.134
#2  7.59  48.7 B_1_2 1350. 0.361 0.771 0.361
#3  8.15  48.4 B_1    191. 0.846 0.141 0.141
#4  8.06  48.2 B_1_2 1350. 0.983 0.224 0.224