In: Math
#a. Using R find the mean, median, mode, range, variance and
standard deviation of the data.
prices <- c(2.45, 1.20, 0.85, 1.33, 2.25, 2.25, 2.09, 2.99,
1.00, 0.88,
1.42, 2.36, 2.15, 2.85, 1.52, 1.99, 2.38, 0.85, 2.22, 2.75)
Mean <- mean( prices )
Median <- median( prices )
findmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
Mode <- findmode(prices)
Range <- range( prices )
Variance <- round( var( prices ) , 3)
StandardDeviation <- round( sd( prices ), 3 )
#the mean, median, mode, range, variance and standard deviation of
the data
print(Mean)
print(Median)
print(Mode)
print(Range)
print(Variance)
print(StandardDeviation )
#b. Using /R, construct a frequency histogram of the data set.
Use the guidelines in the
#class notes. Provide all the details (interval, width, etc.) on
how you constructed the
#histogram. Make sure that you attach the histogram created by
Excel/R. Comment on the shape
#of the frequency distribution (e.g., is the distribution skewed?
Is the distribution
#approximately mound-shaped and symmetric?) for the data set based
on your histogram.
#Count the number of data points
length(prices)
#Calculate the number of bins by taking the square root of the
number
#of data points and round up.
bins = ceiling(sqrt(length(prices)))
bins
#Calculate the bin width by dividing the specification tolerance
or
#range (USL-LSL or Max-Min value) by the # of bins
width = range( prices)/ bins
width
hist( prices, breaks = 5, main = 'Histogram of prices')
# distribution is not mount shaped. It is skewed.
#c. Based on the results in part a, construct the intervals and for
the data set.
#Be sure to show your interval below. Based on the results in part
b what percentage
#of the measurements for the data set falls in each interval
factors <- factor(cut(prices,
breaks=nclass.Sturges(prices)))
#Tabulate and turn into data.frame
out <- as.data.frame(table(factors))
#Add cumFreq and proportions
out <- transform(out, percentage = prop.table(Freq))
out