# Retirado de http://www.unt.edu/rss/class/Jon/R_SC/Module9/CentralLimitTheorem3.R # ####### Central Limit Theorem simulations ####### # # # According to Howell (2007): # "The central limit theorem is a factual statement about the distribution of means. In an extended form, # it states, # Given a poplation with mean 'mu' and variance 'sigma^2', the sampling distribution of the mean (the # distribution of sample means) will have a mean equal to 'mu', a variance equal to 'sigma^2'/n, and # a standard deviation equal to 'sigma'/sqrt(n). The distribution will approach the normal distribution # as n, the sample size, increases" (p. 170). # # ################## Binomial Simulation ################## # # As sample size increases, proportion of heads (p.heads) condenses on 0.50; with # a fair coin (i.e. heads = .50). # coin <- c(1, 0) # 1 = heads, 0 = tails heads <- .50 # Fair coin: heads <- .50 flips <- 10 # i.e. 'sample size' trials <- 100 # Iterations speed <- .25 # Number of seconds between trials p.heads <- as.vector(0) # Proportion of heads for each trial s.add <- as.vector(0) # Number of flips for each trial (increases with each trial) par(mfrow = c(2,1)) # Display two graphs in one window (2 rows, 1 column) for (i in 1:trials){ flips <- round(flips + 1) p.heads[i] <- sum(sample(coin, flips, replace = TRUE, prob = c(heads,1-heads)))/flips s.add[i] <- flips plot(s.add, p.heads, ylim = c(0,heads + .5)) if (i > 2){ hist(p.heads, main = "Proportion of Heads", xlim = c(0,heads + .5), col = "lightblue1", prob = T) lines(density(p.heads), col = "blue")} Sys.sleep(speed) } ################## Gaussian (normal distribution) Simulation ################## # # As sample size increases, each sample will more closely resemble a normal distribution and as the # "distribution of sample means" grows (with each sample), it will center on the population mean (mu). # # The Population of 1000000 individuals with a mean (mu) of 100, standard deviation (sigma) of 15. pop <- rnorm(1000000, 100, 15) par(mfrow = c(1,1)) hist(pop, col = "lightblue1", xlim = c(40, 160), prob = TRUE) lines(density(pop), col = "blue") # Setting the initial conditions. n <- 4 # Initial sample size (this will increase by 1 with each loop of the simulation). draws <- 1000 # The number of samples to draw from the population (i.e. loops or iterations). sample.means <- as.vector(0) # Empty vector for the sample means from each sample drawn during the simulation. par(mfrow = c(2,1)) # Display two graphs in one window, 2 rows on 1 column. # Simulation. for (i in 1:draws){ n <- n + 1 sample <- sample(pop, n, replace = FALSE) sample.means[i] <- mean(sample) hist(sample, col = "lightblue1", xlim = c(40,160), xlab=" ", prob = TRUE, main = "Histogram of each Sample") lines(density(sample), col="blue") hist(sample.means, col = "lightgreen", xlim = c(85,115), xlab=" ", main = "Distribution of Sample Means") Sys.sleep(.25) } ################## Exponential (NON-normal) Distribution ################## # # Even with a NON-normally distributed Population; as the "distribution of sample means" # grows (with each sample), the distribution of sample means will become symmetrical and centered # on the population mean. # # Exponential distribution of the Population of 1000000 individuals with a mean (mu) of approximately 20. pop <- rexp(1000000, 1/20) mean(pop) par(mfrow = c(1,1)) hist(pop, col = "lightblue1", xlim = c(0, 160), prob = TRUE) lines(density(pop), col = "blue") # Setting the initial conditions. n <- 4 # Initial sample size (this will increase by 1 with each loop of the simulation). draws <- 1000 # The number of samples to draw from the population (i.e. loops or iterations). sample.means <- as.vector(0) # Empty vector for the sample means from each sample drawn during the simulation. par(mfrow = c(2,1)) # Display two graphs in one window, 2 rows on 1 column. # Simulation. for (i in 1:draws){ n <- n + 1 sample <- sample(pop, n, replace = FALSE) sample.means[i] <- mean(sample) hist(sample, col = "lightblue1", xlim = c(0,160), xlab=" ", prob = TRUE, main = "Histogram of each Sample") lines(density(sample), col="blue") hist(sample.means, col = "lightgreen", xlim = c(0,40), xlab=" ", main = "Distribution of Sample Means") Sys.sleep(.25) } ################## Reference ################## # # Howell, D. C. (2007). Statistical Methods for Psychology (6th ed.). Belmont, CA: Thomson Wadsworth. # # Feb. 2011