Chapter 10: Inferences Concerning Proportions

Example, Page-280

In [1]:
# Variable declaration
n = 100             # total person
x = 36                 
alpha = 0.05

# Calculation
from scipy import *
from pylab import *

p = float(x) / n                # probability
Z = 1.96                        # Z value corresponding to alpha/2

val = Z*( sqrt( p*(1-p)/n ) )           
lower = p - val
upper = p + val

# Result
print "95% confidence interval: (",round(lower,3),",",round(upper,3),")"
95% confidence interval: ( 0.266 , 0.454 )

Example, page-281

In [2]:
# Variable declaration
n = 400                         # total person
x = 136                         # number of person answering yes

# Calculation
from scipy import *
from pylab import *

p = float(x) / n                            # probability
Z = 2.575                                   # Z value corresponding to alpha/2

max_err = Z*( sqrt( p*(1-p)/n ) )           # maximum error
max_err = round(max_err,3)

# Result
print "Maximum error with 99% confidence interval: ",max_err
Maximum error with 99% confidence interval:  0.061

Example, Page-282

In [3]:
# Variable declaration
E = 0.04            # Maximum Error
alpha = 0.05
Z = 1.96            # Z value corresponding to alpha/2

# Calculation
from scipy import *
from pylab import *

# Part-A
n1 = pow((Z/E),2)/4              # Sample size
n1 = int(math.ceil(n1))

# Part-B
p = 0.12
n2 = pow((Z/E),2)*p*(1-p)              # Sample size
n2 = int(math.ceil(n2))

# Result
print "Part(A): sample size:",n1
print "Part(B): sample size:",n2
Part(A): sample size: 601
Part(B): sample size: 254

Example, Page-284

In [4]:
# Variable declaration
alpha = 0.05      # level of significance
x = 48
n = 60
p0 = 0.70

# Calculation
from scipy import *
from pylab import *

# null hypothesis: if p=0.70 , Alternative hypothesis if p>0.70
Z_thr = 1.645                   # theoritical value of Z

Z_prt = (x - n*p0)/ sqrt(n*p0*(1-p0))        # practical value of Z
Z_prt = round(Z_prt,3)

# Result
print "Practical Z value: ",Z_prt
if(Z_thr > Z_prt):
    print "null hypothesis can not be rejected"
    print "Proportion of good transceivers is not greater than 0.70"
else:
    print "null hypothesis must be rejected"
    print "Proportion of good transceivers is greater than 0.70"
Practical Z value:  1.69
null hypothesis must be rejected
Proportion of good transceivers is greater than 0.70

Example, page-286

In [5]:
# Variable declaration
alpha = 0.05      # level of significance

# Calculation
from scipy import *
from pylab import *

chi_sq_thr = 5.991                      # theoritical value of chi square at alpha = 0.05 with v = 2

l = array([[41,27,22],[79,53,78]])

crum = l[0,0:3]                         # list of crumbled
rem_int = l[1,0:3 ]                     # list of remained intact

e11 = sum(crum)*sum(l[0:2,0]) / (sum(crum)+sum(rem_int))
e12 = sum(crum)*sum(l[0:2,1]) / (sum(crum)+sum(rem_int))
e13 = sum(crum)*sum(l[0:2,2]) / (sum(crum)+sum(rem_int))
e21 = sum(rem_int)*sum(l[0:2,0]) / (sum(crum)+sum(rem_int))
e22 = sum(rem_int)*sum(l[0:2,1]) / (sum(crum)+sum(rem_int))
e23 = sum(rem_int)*sum(l[0:2,2]) / (sum(crum)+sum(rem_int))

q = [e11,e12,e13,e21,e22,e23]                            # list of expected frequency
p = [41,27,22,79,53,78]                                  # list of entries

chi_sq_prt = 0

for i in range(0,6):
    chi_sq_prt = chi_sq_prt + pow( p[i]-q[i] ,2.0) / q[i]
    


chi_sq_prt = round(chi_sq_prt,3)                         # practical value of chi square

# Result
print "Practical chi square value: ",chi_sq_prt
if(chi_sq_thr > chi_sq_prt):
    print "null hypothesis can not be rejected"
    print "data don't refute to hypothesis"
else:
    print "null hypothesis must be rejected"
    print "data refute to hypothesis"
Practical chi square value:  4.575
null hypothesis can not be rejected
data don't refute to hypothesis

Example, page-287

In [6]:
# Variable declaration
alpha = 0.05                           # level of significance
semi = []
failures = []

# Calculation
from scipy import *
from pylab import *
%matplotlib inline

# null hypothesis: if p1=p2=p3=p4 , Alternative hypothesis if p1,p2,p3,p4 all are not equal

chi_sq_thr = 7.815                     # theoritical value of chi square at alpha = 0.05 with v = 3

l = array([[31,42,22,25],[19,8,28,25]])

semi = l[0,0:4]         # list of semiconductors
failures = l[1,0:4]     # list of failures

e11 = sum(semi)*sum(l[0:2,0]) / float(sum(semi)+sum(failures))
e12 = sum(semi)*sum(l[0:2,1]) / float(sum(semi)+sum(failures))
e13 = sum(semi)*sum(l[0:2,2]) / float(sum(semi)+sum(failures))
e14 = sum(semi)*sum(l[0:2,3]) / float(sum(semi)+sum(failures))
e21 = sum(failures)*sum(l[0:2,0]) / float(sum(semi)+sum(failures))
e22 = sum(failures)*sum(l[0:2,1]) / float(sum(semi)+sum(failures))
e23 = sum(failures)*sum(l[0:2,2]) / float(sum(semi)+sum(failures))
e24 = sum(failures)*sum(l[0:2,3]) / float(sum(semi)+sum(failures))

q = [e11,e12,e13,e14,e21,e22,e23,e24]  # list of expected frequency
p = [31,42,22,25,19,8,28,25]           # list of entries

chi_sq_prt = 0

for i in range(0,8):
    chi_sq_prt = chi_sq_prt + pow( p[i]-q[i] ,2) / q[i]

# Result
print "Practical chi square value: ",chi_sq_prt
if(chi_sq_thr > chi_sq_prt):
    print "null hypothesis can not be rejected"
    print "data dont refute to hypothesis"
else:
    print "null hypothesis must be rejected"
    print "data refute to hypothesis"

a1 = [0.62,0.84,0.44,0.50]
a2 = [1.0,1.1,1.2,1.3]
err1 = [0.13,0.10,0.14,0.14]
err2 = [0,0,0,0]
ylim(0.9,1.5)

errorbar(a1,a2,xerr=err1,yerr=err2,fmt='bo')
title("Confidence intervals")
Practical chi square value:  19.5
null hypothesis must be rejected
data refute to hypothesis
Out[6]:
<matplotlib.text.Text at 0x831eac8>

Example, Page-289

In [7]:
# Variable declaration
n1 = 200             # sample-1 size
n2 = 400             # sample-2 size
x1 = 16
x2 = 14
alpha = 0.01         # level of significance

Z_thr = 2.33             # Z value

# Calculation
from scipy import *
from pylab import *

p = (x1+x2)/float(n1+n2)

Z_prt = (x1/float(n1) - x2/float(n2)) / (sqrt( (p*(1-p))*(1.0/n1 + 1.0/n2)))        # Lower limit

Z_prt = round(Z_prt,2)

# Result
print "Practical Z value: ",Z_prt
if(Z_thr > Z_prt):
    print "null hypothesis can not be rejected"
    print "Proportion of tractors is greater for first."
else:
    print "null hypothesis must be rejected"
    print "Proportion of tractors is greater for first."
Practical Z value:  2.38
null hypothesis must be rejected
Proportion of tractors is greater for first.

Example, page-290

In [8]:
# Variable declaration
n1 = 200             # sample-1 size
n2 = 400             # sample-2 size
p1 = 16.0/200
p2 = 14.0/400
alpha = 0.01         # level of significance

Z = 1.96             # Z value at alpha/2

# Calculation
from scipy import *
from pylab import *

# we need to find confidence interval for p1-p2
y1 = (p1-p2) - (Z* (sqrt( (p1*(1-p1))/n1 + (p2*(1-p2))/n2 )))        # Lower limit
y2 = (p1-p2) + (Z* (sqrt( (p1*(1-p1))/n1 + (p2*(1-p2))/n2 )))        # Upper limit

y1 = round(y1,3)
y2 = round(y2,3)

# Result
print "95% confidence interval for (p1-p2) : ( ",y1,",",y2,")"
95% confidence interval for (p1-p2) : (  0.003 , 0.087 )

Example, Page-292

In [9]:
# Variable declaration
l = array([[78,56,54],[15,30,31],[7,14,15]])

# Calculation
from scipy import *
from pylab import *

r1 = l[0,0:3]                         
r2 = l[1,0:3 ]                     
r3 = l[2,0:3 ]                     

e11 = sum(r1)*sum(l[0:3,0]) / float(sum(r1)+sum(r2)+sum(r3))
e12 = sum(r1)*sum(l[0:3,1]) / float(sum(r1)+sum(r2)+sum(r3))
e13 = sum(r1)*sum(l[0:3,2]) / float(sum(r1)+sum(r2)+sum(r3))
e21 = sum(r2)*sum(l[0:3,0]) / float(sum(r1)+sum(r2)+sum(r3))
e22 = sum(r2)*sum(l[0:3,1]) / float(sum(r1)+sum(r2)+sum(r3))
e23 = sum(r2)*sum(l[0:3,2]) / float(sum(r1)+sum(r2)+sum(r3))
e31 = sum(r3)*sum(l[0:3,0]) / float(sum(r1)+sum(r2)+sum(r3))
e32 = sum(r3)*sum(l[0:3,1]) / float(sum(r1)+sum(r2)+sum(r3))
e33 = sum(r3)*sum(l[0:3,2]) / float(sum(r1)+sum(r2)+sum(r3))


q = [e11,e12,e13,e21,e22,e23,e31,e32,e33]                # list of expected frequency

for i in range(0,9):
    q[i] = round(q[i],2)
# Result
print q
[62.67, 62.67, 62.67, 25.33, 25.33, 25.33, 12.0, 12.0, 12.0]

Example, Page-294

In [10]:
# Variable declaration
l = array([[23,60,29],[28,79,60],[9,49,63]])
alpha = 0.01
n = 400
chi_sq_thr = 13.277   

# Calculation
from scipy import *
from pylab import *

r1 = l[0,0:3]                         
r2 = l[1,0:3 ]                     
r3 = l[2,0:3 ]                     

e11 = sum(r1)*sum(l[0:3,0]) / float(n)
e12 = sum(r1)*sum(l[0:3,1]) / float(n)
e13 = sum(r1)*sum(l[0:3,2]) / float(n)
e21 = sum(r2)*sum(l[0:3,0]) / float(n)
e22 = sum(r2)*sum(l[0:3,1]) / float(n)
e23 = sum(r2)*sum(l[0:3,2]) / float(n)
e31 = sum(r3)*sum(l[0:3,0]) / float(n)
e32 = sum(r3)*sum(l[0:3,1]) / float(n)
e33 = sum(r3)*sum(l[0:3,2]) / float(n)


q = [e11,e12,e13,e21,e22,e23,e31,e32,e33]  # list of expected frequency
p = [23,60,29,28,79,60,9,49,63]            # list of entries

chi_sq_prt = 0

for i in range(0,9):
    chi_sq_prt = chi_sq_prt + pow( p[i]-q[i] ,2) / q[i]

# Result
print "Practical chi square value: ",round(chi_sq_prt,3)
if(chi_sq_thr > chi_sq_prt):
    print "null hypothesis can not be rejected"
    print "Dependency between performance & success"
else:
    print "null hypothesis must be rejected"
    print "Dependency between performance & success"
Practical chi square value:  20.179
null hypothesis must be rejected
Dependency between performance & success

Example, page-295

In [11]:
# Variable declaration
l = array([[23,60,29],[28,79,60],[9,49,63]])
alpha = 0.01
n = 400
chi_sq_thr = 13.277   

# Calculation
from scipy import *
from pylab import *

r1 = l[0,0:3]                         
r2 = l[1,0:3 ]                     
r3 = l[2,0:3 ]                     

e11 = sum(r1)*sum(l[0:3,0]) / float(n)
e12 = sum(r1)*sum(l[0:3,1]) / float(n)
e13 = sum(r1)*sum(l[0:3,2]) / float(n)
e21 = sum(r2)*sum(l[0:3,0]) / float(n)
e22 = sum(r2)*sum(l[0:3,1]) / float(n)
e23 = sum(r2)*sum(l[0:3,2]) / float(n)
e31 = sum(r3)*sum(l[0:3,0]) / float(n)
e32 = sum(r3)*sum(l[0:3,1]) / float(n)
e33 = sum(r3)*sum(l[0:3,2]) / float(n)


q = [e11,e12,e13,e21,e22,e23,e31,e32,e33]  # list of expected frequency
p = [23,60,29,28,79,60,9,49,63]            # list of entries
r = []
chi_sq_prt = 0

for i in range(0,9):
    r.append(round(pow( p[i]-q[i] ,2) / q[i],3))

# Result
print r
[2.288, 1.029, 4.32, 0.347, 0.003, 0.189, 4.613, 1.089, 6.3]

Example, Page-296

In [12]:
# Variable declaration
alpha = 0.05
n = 400
chi_sq_thr = 16.919   

# Calculation
from scipy import *
from pylab import *

q = [22.4,42.8,65.2,74.8,69.2,52.8,34.8,20.0,10.0,8.0]  # list of expected frequency
p = [18,47,76,68,74,46,39,15,9,8]            # list of entries

chi_sq_prt = 0

for i in range(0,10):
    chi_sq_prt = chi_sq_prt + pow( p[i]-q[i] ,2) / q[i]

# Result
print "Practical chi square value: ",round(chi_sq_prt,3)
if(chi_sq_thr > chi_sq_prt):
    print "null hypothesis can not be rejected"
    print "Poisson distribution provides a good fit at level alphha=0.05"
else:
    print "null hypothesis must be rejected"
    print "Poisson distribution does not provide a good fit at level alphha=0.05"
Practical chi square value:  6.749
null hypothesis can not be rejected
Poisson distribution provides a good fit at level alphha=0.05