R数据处理-tidyverse

数据-加载 选择 过滤 缺失值 新变量 分组 计算

Install.packages(“tidyverse”)
> require(tidyverse)
载入需要的程辑包:tidyverse
-- Attaching packages ------------------------------------ tidyverse 1.3.0 --
√ ggplot2 3.3.2     √ purrr   0.3.4
√ tibble  3.0.1     √ dplyr   1.0.0
√ tidyr   1.1.0     √ stringr 1.4.0
√ readr   1.3.1     √ forcats 0.5.0
-- Conflicts --------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
> view(starwars)
> starwars %>%
+ select(gender,mass,height,species)
# A tibble: 87 x 4
   gender     mass height species
   <chr>     <dbl>  <int> <chr>  
 1 masculine    77    172 Human  
 2 masculine    75    167 Droid  
 3 masculine    32     96 Droid  
 4 masculine   136    202 Human  
 5 feminine     49    150 Human  
 6 masculine   120    178 Human  
 7 feminine     75    165 Human  
 8 masculine    32     97 Droid  
 9 masculine    84    183 Human  
10 masculine    77    182 Human  
# ... with 77 more rows
> starwars %>%
+ select(gender,mass,height,species) %>%
+ filter(species == "Human")
# A tibble: 35 x 4
   gender     mass height species
   <chr>     <dbl>  <int> <chr>  
 1 masculine    77    172 Human  
 2 masculine   136    202 Human  
 3 feminine     49    150 Human  
 4 masculine   120    178 Human  
 5 feminine     75    165 Human  
 6 masculine    84    183 Human  
 7 masculine    77    182 Human  
 8 masculine    84    188 Human  
 9 masculine    NA    180 Human  
10 masculine    80    180 Human  
# ... with 25 more rows


>  starwars %>%
+ select(gender,mass,height,species) %>%
+ filter(species == "Human") %>%
+ filter(species == "Human") %>%
+ na.omit()
# A tibble: 22 x 4
   gender     mass height species
   <chr>     <dbl>  <int> <chr>  
 1 masculine    77    172 Human  
 2 masculine   136    202 Human  
 3 feminine     49    150 Human  
 4 masculine   120    178 Human  
 5 feminine     75    165 Human  
 6 masculine    84    183 Human  
 7 masculine    77    182 Human  
 8 masculine    84    188 Human  
 9 masculine    80    180 Human  
10 masculine    77    170 Human  
# ... with 12 more rows
> starwars %>%
+  select(gender,mass,height,species) %>%
+  filter(species == "Human") %>%
+  filter(species == "Human") %>%
+  na.omit() %>%
+ mutate(height=height/100)
# A tibble: 22 x 4
   gender     mass height species
   <chr>     <dbl>  <dbl> <chr>  
 1 masculine    77   1.72 Human  
 2 masculine   136   2.02 Human  
 3 feminine     49   1.5  Human  
 4 masculine   120   1.78 Human  
 5 feminine     75   1.65 Human  
 6 masculine    84   1.83 Human  
 7 masculine    77   1.82 Human  
 8 masculine    84   1.88 Human  
 9 masculine    80   1.8  Human  
10 masculine    77   1.7  Human  
# ... with 12 more rows
> starwars %>%
+  select(gender,mass,height,species) %>%
+  filter(species == "Human") %>%
+  filter(species == "Human") %>%
+  na.omit() %>%
+ mutate(height=height/100) %>%
+ mutate(BMI=mass/height^2)
# A tibble: 22 x 5
   gender     mass height species   BMI
   <chr>     <dbl>  <dbl> <chr>   <dbl>
 1 masculine    77   1.72 Human    26.0
 2 masculine   136   2.02 Human    33.3
 3 feminine     49   1.5  Human    21.8
 4 masculine   120   1.78 Human    37.9
 5 feminine     75   1.65 Human    27.5
 6 masculine    84   1.83 Human    25.1
 7 masculine    77   1.82 Human    23.2
 8 masculine    84   1.88 Human    23.8
 9 masculine    80   1.8  Human    24.7
10 masculine    77   1.7  Human    26.6
# ... with 12 more rows
> starwars %>%
+  select(gender,mass,height,species) %>%
+  filter(species == "Human") %>%
+  filter(species == "Human") %>%
+  na.omit() %>%
+ mutate(height=height/100) %>%
+ mutate(BMI=mass/height^2) %>%
+ group_by(gender) %>%
+ summarise(Average_BMI=mean(BMI))
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 2 x 2
  gender    Average_BMI
  <chr>           <dbl>
1 feminine         22.0
2 masculine        26.0