Chapter 10: Inferences Concerning Proportions

Example, Page-280

In [1]:
# Variable declaration
n = 100             # total person
x = 36                 
alpha = 0.05

# Calculation
from scipy import *
from pylab import *

p = float(x) / n                # probability
Z = 1.96                        # Z value corresponding to alpha/2

val = Z*( sqrt( p*(1-p)/n ) )           
lower = p - val
upper = p + val

# Result
print "95% confidence interval: (",round(lower,3),",",round(upper,3),")"
95% confidence interval: ( 0.266 , 0.454 )

Example, page-281

In [2]:
# Variable declaration
n = 400                         # total person
x = 136                         # number of person answering yes

# Calculation
from scipy import *
from pylab import *

p = float(x) / n                            # probability
Z = 2.575                                   # Z value corresponding to alpha/2

max_err = Z*( sqrt( p*(1-p)/n ) )           # maximum error
max_err = round(max_err,3)

# Result
print "Maximum error with 99% confidence interval: ",max_err
Maximum error with 99% confidence interval:  0.061

Example, Page-282

In [3]:
# Variable declaration
E = 0.04            # Maximum Error
alpha = 0.05
Z = 1.96            # Z value corresponding to alpha/2

# Calculation
from scipy import *
from pylab import *

# Part-A
n1 = pow((Z/E),2)/4              # Sample size
n1 = int(math.ceil(n1))

# Part-B
p = 0.12
n2 = pow((Z/E),2)*p*(1-p)              # Sample size
n2 = int(math.ceil(n2))

# Result
print "Part(A): sample size:",n1
print "Part(B): sample size:",n2
Part(A): sample size: 601
Part(B): sample size: 254

Example, Page-284

In [4]:
# Variable declaration
alpha = 0.05      # level of significance
x = 48
n = 60
p0 = 0.70

# Calculation
from scipy import *
from pylab import *

# null hypothesis: if p=0.70 , Alternative hypothesis if p>0.70
Z_thr = 1.645                   # theoritical value of Z

Z_prt = (x - n*p0)/ sqrt(n*p0*(1-p0))        # practical value of Z
Z_prt = round(Z_prt,3)

# Result
print "Practical Z value: ",Z_prt
if(Z_thr > Z_prt):
    print "null hypothesis can not be rejected"
    print "Proportion of good transceivers is not greater than 0.70"
    print "null hypothesis must be rejected"
    print "Proportion of good transceivers is greater than 0.70"
Practical Z value:  1.69
null hypothesis must be rejected
Proportion of good transceivers is greater than 0.70

Example, page-286

In [5]:
# Variable declaration
alpha = 0.05      # level of significance

# Calculation
from scipy import *
from pylab import *

chi_sq_thr = 5.991                      # theoritical value of chi square at alpha = 0.05 with v = 2

l = array([[41,27,22],[79,53,78]])

crum = l[0,0:3]                         # list of crumbled
rem_int = l[1,0:3 ]                     # list of remained intact

e11 = sum(crum)*sum(l[0:2,0]) / (sum(crum)+sum(rem_int))
e12 = sum(crum)*sum(l[0:2,1]) / (sum(crum)+sum(rem_int))
e13 = sum(crum)*sum(l[0:2,2]) / (sum(crum)+sum(rem_int))
e21 = sum(rem_int)*sum(l[0:2,0]) / (sum(crum)+sum(rem_int))
e22 = sum(rem_int)*sum(l[0:2,1]) / (sum(crum)+sum(rem_int))
e23 = sum(rem_int)*sum(l[0:2,2]) / (sum(crum)+sum(rem_int))

q = [e11,e12,e13,e21,e22,e23]                            # list of expected frequency
p = [41,27,22,79,53,78]                                  # list of entries

chi_sq_prt = 0

for i in range(0,6):
    chi_sq_prt = chi_sq_prt + pow( p[i]-q[i] ,2.0) / q[i]

chi_sq_prt = round(chi_sq_prt,3)                         # practical value of chi square

# Result
print "Practical chi square value: ",chi_sq_prt
if(chi_sq_thr > chi_sq_prt):
    print "null hypothesis can not be rejected"
    print "data don't refute to hypothesis"
    print "null hypothesis must be rejected"
    print "data refute to hypothesis"
Practical chi square value:  4.575
null hypothesis can not be rejected
data don't refute to hypothesis

Example, page-287

In [6]:
# Variable declaration
alpha = 0.05                           # level of significance
semi = []
failures = []

# Calculation
from scipy import *
from pylab import *
%matplotlib inline

# null hypothesis: if p1=p2=p3=p4 , Alternative hypothesis if p1,p2,p3,p4 all are not equal

chi_sq_thr = 7.815                     # theoritical value of chi square at alpha = 0.05 with v = 3

l = array([[31,42,22,25],[19,8,28,25]])

semi = l[0,0:4]         # list of semiconductors
failures = l[1,0:4]     # list of failures

e11 = sum(semi)*sum(l[0:2,0]) / float(sum(semi)+sum(failures))
e12 = sum(semi)*sum(l[0:2,1]) / float(sum(semi)+sum(failures))
e13 = sum(semi)*sum(l[0:2,2]) / float(sum(semi)+sum(failures))
e14 = sum(semi)*sum(l[0:2,3]) / float(sum(semi)+sum(failures))
e21 = sum(failures)*sum(l[0:2,0]) / float(sum(semi)+sum(failures))
e22 = sum(failures)*sum(l[0:2,1]) / float(sum(semi)+sum(failures))
e23 = sum(failures)*sum(l[0:2,2]) / float(sum(semi)+sum(failures))
e24 = sum(failures)*sum(l[0:2,3]) / float(sum(semi)+sum(failures))

q = [e11,e12,e13,e14,e21,e22,e23,e24]  # list of expected frequency
p = [31,42,22,25,19,8,28,25]           # list of entries

chi_sq_prt = 0

for i in range(0,8):
    chi_sq_prt = chi_sq_prt + pow( p[i]-q[i] ,2) / q[i]

# Result
print "Practical chi square value: ",chi_sq_prt
if(chi_sq_thr > chi_sq_prt):
    print "null hypothesis can not be rejected"
    print "data dont refute to hypothesis"
    print "null hypothesis must be rejected"
    print "data refute to hypothesis"

a1 = [0.62,0.84,0.44,0.50]
a2 = [1.0,1.1,1.2,1.3]
err1 = [0.13,0.10,0.14,0.14]
err2 = [0,0,0,0]

title("Confidence intervals")
Practical chi square value:  19.5
null hypothesis must be rejected
data refute to hypothesis
<matplotlib.text.Text at 0x831eac8>

Example, Page-289

In [7]:
# Variable declaration
n1 = 200             # sample-1 size
n2 = 400             # sample-2 size
x1 = 16
x2 = 14
alpha = 0.01         # level of significance

Z_thr = 2.33             # Z value

# Calculation
from scipy import *
from pylab import *

p = (x1+x2)/float(n1+n2)

Z_prt = (x1/float(n1) - x2/float(n2)) / (sqrt( (p*(1-p))*(1.0/n1 + 1.0/n2)))        # Lower limit

Z_prt = round(Z_prt,2)

# Result
print "Practical Z value: ",Z_prt
if(Z_thr > Z_prt):
    print "null hypothesis can not be rejected"
    print "Proportion of tractors is greater for first."
    print "null hypothesis must be rejected"
    print "Proportion of tractors is greater for first."
Practical Z value:  2.38
null hypothesis must be rejected
Proportion of tractors is greater for first.

Example, page-290

In [8]:
# Variable declaration
n1 = 200             # sample-1 size
n2 = 400             # sample-2 size
p1 = 16.0/200
p2 = 14.0/400
alpha = 0.01         # level of significance

Z = 1.96             # Z value at alpha/2

# Calculation
from scipy import *
from pylab import *

# we need to find confidence interval for p1-p2
y1 = (p1-p2) - (Z* (sqrt( (p1*(1-p1))/n1 + (p2*(1-p2))/n2 )))        # Lower limit
y2 = (p1-p2) + (Z* (sqrt( (p1*(1-p1))/n1 + (p2*(1-p2))/n2 )))        # Upper limit

y1 = round(y1,3)
y2 = round(y2,3)

# Result
print "95% confidence interval for (p1-p2) : ( ",y1,",",y2,")"
95% confidence interval for (p1-p2) : (  0.003 , 0.087 )

Example, Page-292

In [9]:
# Variable declaration
l = array([[78,56,54],[15,30,31],[7,14,15]])

# Calculation
from scipy import *
from pylab import *

r1 = l[0,0:3]                         
r2 = l[1,0:3 ]                     
r3 = l[2,0:3 ]                     

e11 = sum(r1)*sum(l[0:3,0]) / float(sum(r1)+sum(r2)+sum(r3))
e12 = sum(r1)*sum(l[0:3,1]) / float(sum(r1)+sum(r2)+sum(r3))
e13 = sum(r1)*sum(l[0:3,2]) / float(sum(r1)+sum(r2)+sum(r3))
e21 = sum(r2)*sum(l[0:3,0]) / float(sum(r1)+sum(r2)+sum(r3))
e22 = sum(r2)*sum(l[0:3,1]) / float(sum(r1)+sum(r2)+sum(r3))
e23 = sum(r2)*sum(l[0:3,2]) / float(sum(r1)+sum(r2)+sum(r3))
e31 = sum(r3)*sum(l[0:3,0]) / float(sum(r1)+sum(r2)+sum(r3))
e32 = sum(r3)*sum(l[0:3,1]) / float(sum(r1)+sum(r2)+sum(r3))
e33 = sum(r3)*sum(l[0:3,2]) / float(sum(r1)+sum(r2)+sum(r3))

q = [e11,e12,e13,e21,e22,e23,e31,e32,e33]                # list of expected frequency

for i in range(0,9):
    q[i] = round(q[i],2)
# Result
print q
[62.67, 62.67, 62.67, 25.33, 25.33, 25.33, 12.0, 12.0, 12.0]

Example, Page-294

In [10]:
# Variable declaration
l = array([[23,60,29],[28,79,60],[9,49,63]])
alpha = 0.01
n = 400
chi_sq_thr = 13.277   

# Calculation
from scipy import *
from pylab import *

r1 = l[0,0:3]                         
r2 = l[1,0:3 ]                     
r3 = l[2,0:3 ]                     

e11 = sum(r1)*sum(l[0:3,0]) / float(n)
e12 = sum(r1)*sum(l[0:3,1]) / float(n)
e13 = sum(r1)*sum(l[0:3,2]) / float(n)
e21 = sum(r2)*sum(l[0:3,0]) / float(n)
e22 = sum(r2)*sum(l[0:3,1]) / float(n)
e23 = sum(r2)*sum(l[0:3,2]) / float(n)
e31 = sum(r3)*sum(l[0:3,0]) / float(n)
e32 = sum(r3)*sum(l[0:3,1]) / float(n)
e33 = sum(r3)*sum(l[0:3,2]) / float(n)

q = [e11,e12,e13,e21,e22,e23,e31,e32,e33]  # list of expected frequency
p = [23,60,29,28,79,60,9,49,63]            # list of entries

chi_sq_prt = 0

for i in range(0,9):
    chi_sq_prt = chi_sq_prt + pow( p[i]-q[i] ,2) / q[i]

# Result
print "Practical chi square value: ",round(chi_sq_prt,3)
if(chi_sq_thr > chi_sq_prt):
    print "null hypothesis can not be rejected"
    print "Dependency between performance & success"
    print "null hypothesis must be rejected"
    print "Dependency between performance & success"
Practical chi square value:  20.179
null hypothesis must be rejected
Dependency between performance & success

Example, page-295

In [11]:
# Variable declaration
l = array([[23,60,29],[28,79,60],[9,49,63]])
alpha = 0.01
n = 400
chi_sq_thr = 13.277   

# Calculation
from scipy import *
from pylab import *

r1 = l[0,0:3]                         
r2 = l[1,0:3 ]                     
r3 = l[2,0:3 ]                     

e11 = sum(r1)*sum(l[0:3,0]) / float(n)
e12 = sum(r1)*sum(l[0:3,1]) / float(n)
e13 = sum(r1)*sum(l[0:3,2]) / float(n)
e21 = sum(r2)*sum(l[0:3,0]) / float(n)
e22 = sum(r2)*sum(l[0:3,1]) / float(n)
e23 = sum(r2)*sum(l[0:3,2]) / float(n)
e31 = sum(r3)*sum(l[0:3,0]) / float(n)
e32 = sum(r3)*sum(l[0:3,1]) / float(n)
e33 = sum(r3)*sum(l[0:3,2]) / float(n)

q = [e11,e12,e13,e21,e22,e23,e31,e32,e33]  # list of expected frequency
p = [23,60,29,28,79,60,9,49,63]            # list of entries
r = []
chi_sq_prt = 0

for i in range(0,9):
    r.append(round(pow( p[i]-q[i] ,2) / q[i],3))

# Result
print r
[2.288, 1.029, 4.32, 0.347, 0.003, 0.189, 4.613, 1.089, 6.3]

Example, Page-296

In [12]:
# Variable declaration
alpha = 0.05
n = 400
chi_sq_thr = 16.919   

# Calculation
from scipy import *
from pylab import *

q = [22.4,42.8,65.2,74.8,69.2,52.8,34.8,20.0,10.0,8.0]  # list of expected frequency
p = [18,47,76,68,74,46,39,15,9,8]            # list of entries

chi_sq_prt = 0

for i in range(0,10):
    chi_sq_prt = chi_sq_prt + pow( p[i]-q[i] ,2) / q[i]

# Result
print "Practical chi square value: ",round(chi_sq_prt,3)
if(chi_sq_thr > chi_sq_prt):
    print "null hypothesis can not be rejected"
    print "Poisson distribution provides a good fit at level alphha=0.05"
    print "null hypothesis must be rejected"
    print "Poisson distribution does not provide a good fit at level alphha=0.05"
Practical chi square value:  6.749
null hypothesis can not be rejected
Poisson distribution provides a good fit at level alphha=0.05