In: Statistics and Probability
<< Using R code >>
Set seed number as "12345" every time you generate random
numbers. For each answer,
use # to explain if necessary.
2) Generate a data.frame "D" with 3 variables.
The 1st variable "v1" has 50 number of N(5,3^2) (normal with mean
5, standard deviation 3)
The 2nd variable "v2" has 50 number of exp(5) (exponential with
parameter 5)
The 3rd variable "v3" has 50 random characters from lower case
alphabets.
2-1) Rename the variable from "v1", "v2", "v3" to "normal",
"exponential", "alphabe3t",
print first 6 observations of D.
2-2) Make 3 subgroups of D according to "alphabet" belogs to (a~k),
(l~p), (q-z), and put
the data frame naems as "D1", "D2", "D3". Print D1, D2, D3.
2-3) Sort each of the three data frames by ascending order of
alphabet values, and name these
"E1", "E2", "E3". Print E1, E2, E3.
2)
set.seed(12345)
#Data
V1<-rnorm(50,5,3)
V2<-rexp(50,5)
V3<-stri_rand_strings(50,1,"[a-z]")
#install stringi package if you not find
stri_rand_strings in your R studio
D<-data.frame(V1,V2,V3)
View (D)
V1 V2 V3
1 6.75658645 0.174201759 x
2 7.12839805 0.046901466 u
3 4.67209006 0.072277117 v
4 3.63950848 0.054981773 e
5 6.81766237 0.029814905 k
6 -0.45386790 0.043790418 f
7 6.89029565 0.245385144 g
8 4.17144768 0.102108418 z
9 4.14752077 0.242418430 v
10 2.24203399 0.117988825 j
11 4.65125658 0.081506896 y
12 10.45193613 0.874056337 e
13 6.11188359 0.195263091 e
14 6.56064937 0.023782693 t
15 2.74840402 0.005967024 x
16 7.45069952 0.502966267 h
17 2.34092744 0.073300229 i
18 4.00526723 0.083988820 z
19 8.36213796 0.120092622 a
20 5.89617110 0.025874649 h
21 7.33886577 0.109705316 u
22 9.36735525 0.568582478 g
23 3.06701471 0.068767256 l
24 0.34058778 0.125106849 p
25 0.20687145 0.065264496 x
26 10.41529256 0.096676176 d
27 3.55505791 0.373324298 n
28 6.86113940 0.528030102 r
29 6.83637048 0.123103996 t
30 4.51306707 0.008711054 c
31 7.43561954 0.077416271 b
32 11.59050064 0.297638491 o
33 11.14757101 0.068821101 q
34 9.89733692 0.051588192 h
35 5.76281358 0.023617179 s
36 6.47356484 0.118268482 j
37 4.02774026 0.220246832 g
38 0.01384927 0.410903149 y
39 10.30320155 0.113912250 l
40 5.07740315 0.073220500 i
41 8.38553250 0.222076571 n
42 -2.14107418 0.096264206 q
43 1.81920334 0.037960800 e
44 7.81142162 0.052737875 c
45 7.56335516 0.162763358 c
46 9.38218821 0.255281735 c
47 0.76070367 0.026090956 n
48 6.70220976 0.241876557 x
49 6.74956296 0.100306337 s
50 1.07960350 0.081680670 x
2-1)
# renaming variable
colnames(D)[colnames(D)=="V1"]<-"normal"
colnames(D)[colnames(D)=="V2"]<-"exponential"
colnames(D)[colnames(D)=="V3"]<-"alphabe3t"
normal exponential alphabe3t
1 6.75658645 0.174201759 x
2 7.12839805 0.046901466 u
3 4.67209006 0.072277117 v
4 3.63950848 0.054981773 e
5 6.81766237 0.029814905 k
6 -0.45386790 0.043790418 f
7 6.89029565 0.245385144 g
8 4.17144768 0.102108418 z
9 4.14752077 0.242418430 v
10 2.24203399 0.117988825 j
11 4.65125658 0.081506896 y
12 10.45193613 0.874056337 e
13 6.11188359 0.195263091 e
14 6.56064937 0.023782693 t
15 2.74840402 0.005967024 x
16 7.45069952 0.502966267 h
17 2.34092744 0.073300229 i
18 4.00526723 0.083988820 z
19 8.36213796 0.120092622 a
20 5.89617110 0.025874649 h
21 7.33886577 0.109705316 u
22 9.36735525 0.568582478 g
23 3.06701471 0.068767256 l
24 0.34058778 0.125106849 p
25 0.20687145 0.065264496 x
26 10.41529256 0.096676176 d
27 3.55505791 0.373324298 n
28 6.86113940 0.528030102 r
29 6.83637048 0.123103996 t
30 4.51306707 0.008711054 c
31 7.43561954 0.077416271 b
32 11.59050064 0.297638491 o
33 11.14757101 0.068821101 q
34 9.89733692 0.051588192 h
35 5.76281358 0.023617179 s
36 6.47356484 0.118268482 j
37 4.02774026 0.220246832 g
38 0.01384927 0.410903149 y
39 10.30320155 0.113912250 l
40 5.07740315 0.073220500 i
41 8.38553250 0.222076571 n
42 -2.14107418 0.096264206 q
43 1.81920334 0.037960800 e
44 7.81142162 0.052737875 c
45 7.56335516 0.162763358 c
46 9.38218821 0.255281735 c
47 0.76070367 0.026090956 n
48 6.70220976 0.241876557 x
49 6.74956296 0.100306337 s
50 1.07960350 0.081680670 x
#print first 6 observation
head(D,n=6)
normal exponential alphabe3t
1 6.7565865 0.17420176 x
2 7.1283981 0.04690147 u
3 4.6720901 0.07227712 v
4 3.6395085 0.05498177 e
5 6.8176624 0.02981491 k
6 -0.4538679 0.04379042 f
2-2)
#three groups
#install tidyverse package if you not find str_subset in your R
studio
D1<- str_subset(D$alphabe3t,"^[a-k]")
D2<- str_subset(D$alphabe3t,"^[l-p]")
D3<- str_subset(D$alphabe3t,"^[q-z]")
print(D1)
[1] "e" "k" "f" "g" "j" "e" "e" "h" "i" "a" "h" "g" "d" "c" "b"
"h" "j" "g" "i" "e"
[21] "c" "c" "c"
print(D2)
[1] "l" "p" "n" "o" "l" "n" "n"
print(D3)
[1] "x" "u" "v" "z" "v" "y" "t" "x" "z" "u" "x" "r" "t" "q" "s" "y" "q" "x" "s" "x"
2-3)
#Sorting of three groups
E1<-str_sort(D1) #get str_sort in
tidyverse package
E2<-str_sort(D2)
E3<-str_sort(D3)
print(E1)
[1] "a" "b" "c" "c" "c" "c" "d" "e" "e" "e" "e" "f" "g" "g" "g"
"h" "h" "h" "i" "i"
[21] "j" "j" "k"
print(E2)
[1] "l" "l" "n" "n" "n" "o" "p"
print(E3)
[1] "q" "q" "r" "s" "s" "t" "t" "u" "u" "v" "v" "x" "x" "x" "x" "x" "y" "y" "z" "z"