Package astLib :: Module astStats
[hide private]
[frames] | no frames]

Source Code for Module astLib.astStats

  1  # -*- coding: utf-8 -*- 
  2  """module for performing statistical calculations. 
  3   
  4  (c) 2007-2009 Matt Hilton  
  5   
  6  U{http://astlib.sourceforge.net} 
  7   
  8  This module (as you may notice) provides very few statistical routines. It does, however, provide 
  9  biweight (robust) estimators of location and scale, as described in Beers et al. 1990 (AJ, 100, 
 10  32), in addition to a robust least squares fitting routine that uses the biweight transform. 
 11   
 12  Some routines may fail if they are passed lists with few items and encounter a `divide by zero' 
 13  error. Where this occurs, the function will return None. An error message will be printed to the 
 14  console when this happens if astStats.REPORT_ERRORS=True (the default). Testing if an 
 15  astStats function returns None can be used to handle errors in scripts.  
 16   
 17  For extensive statistics modules, the Python bindings for GNU R (U{http://rpy.sourceforge.net}), or 
 18  SciPy (U{http://www.scipy.org}) are suggested. 
 19   
 20  """ 
 21   
 22  import math 
 23  import sys 
 24   
 25  REPORT_ERRORS=True 
 26   
 27  #--------------------------------------------------------------------------------------------------- 
28 -def mean(dataList):
29 """Calculates the mean average of a list of numbers. 30 31 @type dataList: list 32 @param dataList: input data, must be a one dimensional list 33 @rtype: float 34 @return: mean average 35 36 """ 37 sum=0 38 for item in dataList: 39 sum=sum+float(item) 40 if len(dataList)>0: 41 mean=sum/float(len(dataList)) 42 else: 43 mean=0 44 return mean
45 46 #---------------------------------------------------------------------------------------------------
47 -def weightedMean(dataList):
48 """Calculates the weighted mean average of a two dimensional list (value, weight) of 49 numbers. 50 51 @type dataList: list 52 @param dataList: input data, must be a two dimensional list in format [value, weight] 53 @rtype: float 54 @return: weighted mean average 55 56 """ 57 sum=0 58 weightSum=0 59 for item in dataList: 60 sum=sum+float(item[0]*item[1]) 61 weightSum=weightSum+item[1] 62 if len(dataList)>0: 63 mean=sum/weightSum 64 else: 65 mean=0 66 return mean
67 68 #---------------------------------------------------------------------------------------------------
69 -def stdev(dataList):
70 """Calculates the (sample) standard deviation of a list of numbers. 71 72 @type dataList: list 73 @param dataList: input data, must be a one dimensional list 74 @rtype: float 75 @return: standard deviation 76 77 """ 78 listMean=mean(dataList) 79 sum=0 80 for item in dataList: 81 sum=sum+(float(item-listMean)*float(item-listMean)) 82 if len(dataList)>0: 83 stdev=math.sqrt(sum/(float(len(dataList))-1)) 84 else: 85 stdev=0 86 return stdev
87 88 #---------------------------------------------------------------------------------------------------
89 -def rms(dataList):
90 """Calculates the root mean square of a list of numbers. 91 92 @type dataList: list 93 @param dataList: input data, must be a one dimensional list 94 @rtype: float 95 @return: root mean square 96 97 """ 98 dataListSq=[] 99 for item in dataList: 100 dataListSq.append(item*item) 101 listMeanSq=mean(dataListSq) 102 rms=math.sqrt(listMeanSq) 103 104 return rms
105 106 #---------------------------------------------------------------------------------------------------
107 -def weightedStdev(dataList):
108 """Calculates the weighted (sample) standard deviation of a list of numbers. 109 110 @type dataList: list 111 @param dataList: input data, must be a two dimensional list in format [value, weight] 112 @rtype: float 113 @return: weighted standard deviation 114 115 @note: Returns None if an error occurs. 116 117 """ 118 listMean=weightedMean(dataList) 119 sum=0 120 wSum=0 121 wNonZero=0 122 for item in dataList: 123 if item[1]>0.0: 124 sum=sum+float((item[0]-listMean)/item[1])*float((item[0]-listMean)/item[1]) 125 wSum=wSum+float(1.0/item[1])*float(1.0/item[1]) 126 127 if len(dataList)>1: 128 nFactor=float(len(dataList))/float(len(dataList)-1) 129 stdev=math.sqrt(nFactor*(sum/wSum)) 130 else: 131 if REPORT_ERRORS==True: 132 print """ERROR: astStats.weightedStdev() : dataList contains < 2 items.""" 133 stdev=None 134 return stdev
135 136 #---------------------------------------------------------------------------------------------------
137 -def median(dataList):
138 """Calculates the median of a list of numbers. 139 140 @type dataList: list 141 @param dataList: input data, must be a one dimensional list 142 @rtype: float 143 @return: median average 144 145 """ 146 dataList.sort() 147 midValue=float(len(dataList)/2.0) 148 fractPart=math.modf(midValue)[0] 149 150 if fractPart==0.5: # if odd number of items 151 midValue=math.ceil(midValue) 152 153 # Doesn't like it when handling a list with only one item in it! 154 if midValue<len(dataList)-1: 155 median=dataList[int(midValue)] 156 157 if fractPart!=0.5: # if even 158 prevItem=dataList[int(midValue)-1] 159 median=(median+prevItem)/2.0 160 161 else: 162 median=dataList[0] 163 164 return median
165 166 #---------------------------------------------------------------------------------------------------
167 -def modeEstimate(dataList):
168 """Returns an estimate of the mode of a set of values by mode=(3*median)-(2*mean). 169 170 @type dataList: list 171 @param dataList: input data, must be a one dimensional list 172 @rtype: float 173 @return: estimate of mode average 174 175 """ 176 mode=(3*median(dataList))-(2*mean(dataList)) 177 178 return mode
179 180 #---------------------------------------------------------------------------------------------------
181 -def MAD(dataList):
182 """Calculates the Median Absolute Deviation of a list of numbers. 183 184 @type dataList: list 185 @param dataList: input data, must be a one dimensional list 186 @rtype: float 187 @return: median absolute deviation 188 189 """ 190 listMedian=median(dataList) 191 192 # Calculate |x-M| values 193 diffModuli=[] 194 for item in dataList: 195 diffModuli.append(math.fabs(item-listMedian)) 196 diffModuli.sort() 197 198 midValue=float(len(diffModuli)/2.0) 199 fractPart=math.modf(midValue)[0] 200 201 if fractPart==0.5: # if odd number of items 202 midValue=math.ceil(midValue) 203 204 # Doesn't like it when handling a list with only one item in it! 205 if midValue<len(diffModuli)-1: 206 MAD=diffModuli[int(midValue)] 207 208 if fractPart!=0.5: # if even 209 prevItem=diffModuli[int(midValue)-1] 210 MAD=(MAD+prevItem)/2.0 211 212 else: 213 MAD=diffModuli[0] 214 215 return MAD
216 217 #---------------------------------------------------------------------------------------------------
218 -def biweightLocation(dataList, tuningConstant):
219 """Calculates the biweight location estimator (like a robust average) of a list of 220 numbers. 221 222 @type dataList: list 223 @param dataList: input data, must be a one dimensional list 224 @type tuningConstant: float 225 @param tuningConstant: 6.0 is recommended. 226 @rtype: float 227 @return: biweight location 228 229 @note: Returns None if an error occurs. 230 231 """ 232 C=tuningConstant 233 listMedian=median(dataList) 234 listMAD=MAD(dataList) 235 if listMAD!=0: 236 uValues=[] 237 for item in dataList: 238 uValues.append((item-listMedian)/(C*listMAD)) 239 240 top=0 # numerator equation (5) Beers et al if you like 241 bottom=0 # denominator 242 for i in range(len(uValues)): 243 if math.fabs(uValues[i])<=1.0: 244 top=top+((dataList[i]-listMedian) \ 245 *(1.0-(uValues[i]*uValues[i])) \ 246 *(1.0-(uValues[i]*uValues[i]))) 247 248 bottom=bottom+((1.0-(uValues[i]*uValues[i])) \ 249 *(1.0-(uValues[i]*uValues[i]))) 250 251 CBI=listMedian+(top/bottom) 252 253 else: 254 if REPORT_ERRORS==True: 255 print """ERROR: astStats: biweightLocation() : MAD() returned 0.""" 256 return None 257 258 return CBI
259 260 #---------------------------------------------------------------------------------------------------
261 -def biweightScale(dataList, tuningConstant):
262 """Calculates the biweight scale estimator (like a robust standard deviation) of a list 263 of numbers. 264 265 @type dataList: list 266 @param dataList: input data, must be a one dimensional list 267 @type tuningConstant: float 268 @param tuningConstant: 9.0 is recommended. 269 @rtype: float 270 @return: biweight scale 271 272 @note: Returns None if an error occurs. 273 274 """ 275 C=tuningConstant 276 277 # Calculate |x-M| values and u values 278 listMedian=median(dataList) 279 listMAD=MAD(dataList) 280 diffModuli=[] 281 for item in dataList: 282 diffModuli.append(math.fabs(item-listMedian)) 283 uValues=[] 284 for item in dataList: 285 try: 286 uValues.append((item-listMedian)/(C*listMAD)) 287 except ZeroDivisionError: 288 if REPORT_ERRORS==True: 289 print """ERROR: astStats.biweightScale() : divide by zero error.""" 290 return None 291 292 top=0 # numerator equation (9) Beers et al 293 bottom=0 294 valCount=0 # Count values where u<1 only 295 296 for i in range(len(uValues)): 297 # Skip u values >1 298 if math.fabs(uValues[i])<=1.0: 299 u2Term=1.0-(uValues[i]*uValues[i]) 300 u4Term=math.pow(u2Term, 4) 301 top=top+((diffModuli[i]*diffModuli[i])*u4Term) 302 bottom=bottom+(u2Term*(1.0-(5.0*(uValues[i]*uValues[i])))) 303 valCount=valCount+1 304 305 top=math.sqrt(top) 306 bottom=math.fabs(bottom) 307 308 SBI=math.pow(float(valCount), 0.5)*(top/bottom) 309 return SBI
310 311 #---------------------------------------------------------------------------------------------------
312 -def biweightClipped(dataList, tuningConstant, sigmaCut):
313 """Iteratively calculates biweight location and scale, using sigma clipping, for a list 314 of values. The calculation is performed on the first column of a multi-dimensional 315 list; other columns are ignored. 316 317 @type dataList: list 318 @param dataList: input data 319 @type tuningConstant: float 320 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 321 scale estimates 322 @type sigmaCut: float 323 @param sigmaCut: sigma clipping to apply 324 @rtype: dictionary 325 @return: estimate of biweight location, scale, and list of non-clipped data, in the format 326 {'biweightLocation', 'biweightScale', 'dataList'} 327 328 @note: Returns None if an error occurs. 329 330 """ 331 332 iterations=0 333 clippedValues=[] 334 for row in dataList: 335 if type(row)==list: 336 clippedValues.append(row[0]) 337 else: 338 clippedValues.append(row) 339 340 while iterations<11 and len(clippedValues)>5: 341 342 cbi=biweightLocation(clippedValues, tuningConstant) 343 sbi=biweightScale(clippedValues, tuningConstant) 344 345 # check for either biweight routine falling over 346 # happens when feed in lots of similar numbers 347 # e.g. when bootstrapping with a small sample 348 if cbi==None or sbi==None: 349 350 if REPORT_ERRORS==True: 351 print """ERROR: astStats : biweightClipped() : 352 divide by zero error.""" 353 354 return None 355 356 else: 357 358 clippedValues=[] 359 clippedData=[] 360 for row in dataList: 361 if type(row)==list: 362 if row[0]>cbi-(sigmaCut*sbi) \ 363 and row[0]<cbi+(sigmaCut*sbi): 364 clippedValues.append(row[0]) 365 clippedData.append(row) 366 else: 367 if row>cbi-(sigmaCut*sbi) \ 368 and row<cbi+(sigmaCut*sbi): 369 clippedValues.append(row) 370 clippedData.append(row) 371 372 iterations=iterations+1 373 374 return { 'biweightLocation':cbi , 375 'biweightScale':sbi, 376 'dataList':clippedData}
377 378 #---------------------------------------------------------------------------------------------------
379 -def biweightTransform(dataList, tuningConstant):
380 """Calculates the biweight transform for a set of values. Useful for using as weights in 381 robust line fitting. 382 383 @type dataList: list 384 @param dataList: input data, must be a one dimensional list 385 @type tuningConstant: float 386 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 387 scale estimates 388 @rtype: list 389 @return: list of biweights 390 391 """ 392 C=tuningConstant 393 394 # Calculate |x-M| values and u values 395 listMedian=abs(median(dataList)) 396 cutoff=C*listMedian 397 biweights=[] 398 for item in dataList: 399 if abs(item)<cutoff: 400 biweights.append([item, 401 (1.0-((item/cutoff)*(item/cutoff))) \ 402 *(1.0-((item/cutoff)*(item/cutoff)))]) 403 else: 404 biweights.append([item, 0.0]) 405 406 return biweights
407 408 #---------------------------------------------------------------------------------------------------
409 -def OLSFit(dataList):
410 """Performs an ordinary least squares fit on a two dimensional list of numbers. 411 Minimum number of data points is 5. 412 413 @type dataList: list 414 @param dataList: input data, must be a two dimensional list in format [x, y] 415 @rtype: dictionary 416 @return: slope and intercept on y-axis, with associated errors, in the format 417 {'slope', 'intercept', 'slopeError', 'interceptError'} 418 419 @note: Returns None if an error occurs. 420 421 """ 422 sumX=0 423 sumY=0 424 sumXY=0 425 sumXX=0 426 n=float(len(dataList)) 427 if n>4: 428 for item in dataList: 429 sumX=sumX+item[0] 430 sumY=sumY+item[1] 431 sumXY=sumXY+(item[0]*item[1]) 432 sumXX=sumXX+(item[0]*item[0]) 433 m=((n*sumXY)-(sumX*sumY))/((n*sumXX)-(sumX*sumX)) 434 c=((sumXX*sumY)-(sumX*sumXY))/((n*sumXX)-(sumX*sumX)) 435 436 sumRes=0 437 for item in dataList: 438 439 sumRes=sumRes+((item[1]-(m*item[0])-c) \ 440 *(item[1]-(m*item[0])-c)) 441 442 sigma=math.sqrt((1.0/(n-2))*sumRes) 443 444 mSigma=(sigma*math.sqrt(n))/math.sqrt((n*sumXX)-(sumX*sumX)) 445 cSigma=(sigma*math.sqrt(sumXX))/math.sqrt((n*sumXX)-(sumX*sumX)) 446 else: 447 if REPORT_ERRORS==True: 448 print """ERROR: astStats.OLSFit() : dataList contains < 5 items.""" 449 450 return None 451 452 return {'slope':m, 453 'intercept':c, 454 'slopeError':mSigma, 455 'interceptError':cSigma}
456 457 #---------------------------------------------------------------------------------------------------
458 -def clippedMeanStdev(dataList, sigmaCut = 3.0, maxIterations = 10.0):
459 """Calculates the clipped mean and stdev of a list of numbers. 460 461 @type dataList: list 462 @param dataList: input data, one dimensional list of numbers 463 @type sigmaCut: float 464 @param sigmaCut: clipping in Gaussian sigma to apply 465 @type maxIterations: int 466 @param maxIterations: maximum number of iterations 467 @rtype: dictionary 468 @return: format {'clippedMean', 'clippedStdev', 'numPoints'} 469 470 """ 471 472 listCopy=[] 473 for d in dataList: 474 listCopy.append(d) 475 476 iterations=0 477 while iterations < maxIterations and len(listCopy) > 4: 478 479 m=mean(listCopy) 480 s=stdev(listCopy) 481 482 newCopy=[] 483 for i in listCopy: 484 if abs(i) < abs(m+sigmaCut*s): 485 newCopy.append(i) 486 listCopy=newCopy 487 488 iterations=iterations+1 489 490 return {'clippedMean':m, 'clippedStdev':s, 'numPoints':len(listCopy)}
491 492 #---------------------------------------------------------------------------------------------------
493 -def clippedWeightedLSFit(dataList, sigmaCut):
494 """Performs a weighted least squares fit on a list of numbers with sigma clipping. Minimum number of data 495 points is 5. 496 497 @type dataList: list 498 @param dataList: input data, must be a three dimensional list in format [x, y, y weight] 499 @rtype: dictionary 500 @return: slope and intercept on y-axis, with associated errors, in the format 501 {'slope', 'intercept', 'slopeError', 'interceptError'} 502 503 @note: Returns None if an error occurs. 504 505 """ 506 507 iterations=0 508 clippedValues=[] 509 for row in dataList: 510 clippedValues.append(row) 511 512 while iterations<11 and len(clippedValues)>4: 513 514 fitResults=weightedLSFit(clippedValues, "errors") 515 516 if fitResults['slope'] == None: 517 518 if REPORT_ERRORS==True: 519 print """ERROR: astStats : clippedWeightedLSFit() : 520 divide by zero error.""" 521 522 return None 523 524 else: 525 526 clippedValues=[] 527 for row in dataList: 528 529 # Trim points more than sigmaCut*sigma away from the fitted line 530 fit=fitResults['slope']*row[0]+fitResults['intercept'] 531 res=row[1]-fit 532 if abs(res)/row[2] < sigmaCut: 533 clippedValues.append(row) 534 535 iterations=iterations+1 536 537 # store the number of values that made it through the clipping process 538 fitResults['numDataPoints']=len(clippedValues) 539 540 return fitResults
541 542 #---------------------------------------------------------------------------------------------------
543 -def weightedLSFit(dataList, weightType):
544 """Performs a weighted least squares fit on a three dimensional list of numbers [x, y, y error]. 545 546 @type dataList: list 547 @param dataList: input data, must be a three dimensional list in format [x, y, y error] 548 @type weightType: string 549 @param weightType: if "errors", weights are calculated assuming the input data is in the 550 format [x, y, error on y]; if "weights", the weights are assumed to be already calculated and 551 stored in a fourth column [x, y, error on y, weight] (as used by e.g. L{astStats.biweightLSFit}) 552 @rtype: dictionary 553 @return: slope and intercept on y-axis, with associated errors, in the format 554 {'slope', 'intercept', 'slopeError', 'interceptError'} 555 556 @note: Returns None if an error occurs. 557 558 """ 559 if weightType == "weights": 560 sumW=0 561 sumWX=0 562 sumWY=0 563 sumWXY=0 564 sumWXX=0 565 n=float(len(dataList)) 566 if n > 4: 567 for item in dataList: 568 W=item[3] 569 sumWX=sumWX+(W*item[0]) 570 sumWY=sumWY+(W*item[1]) 571 sumWXY=sumWXY+(W*item[0]*item[1]) 572 sumWXX=sumWXX+(W*item[0]*item[0]) 573 sumW=sumW+W 574 #print sumW, sumWXX, sumWX 575 576 try: 577 m=((sumW*sumWXY)-(sumWX*sumWY)) \ 578 /((sumW*sumWXX)-(sumWX*sumWX)) 579 except ZeroDivisionError: 580 if REPORT_ERRORS == True: 581 print "ERROR: astStats.weightedLSFit() : divide by zero error." 582 return None 583 584 try: 585 c=((sumWXX*sumWY)-(sumWX*sumWXY)) \ 586 /((sumW*sumWXX)-(sumWX*sumWX)) 587 except ZeroDivisionError: 588 if REPORT_ERRORS == True: 589 print "ERROR: astStats.weightedLSFit() : divide by zero error." 590 return None 591 592 sumRes=0 593 for item in dataList: 594 595 sumRes=sumRes+((item[1]-(m*item[0])-c) \ 596 *(item[1]-(m*item[0])-c)) 597 598 sigma=math.sqrt((1.0/(n-2))*sumRes) 599 600 # Can get div0 errors here so check 601 # When biweight fitting converges this shouldn't happen 602 if (n*sumWXX)-(sumWX*sumWX)>0.0: 603 604 mSigma=(sigma*math.sqrt(n)) \ 605 /math.sqrt((n*sumWXX)-(sumWX*sumWX)) 606 607 cSigma=(sigma*math.sqrt(sumWXX)) \ 608 /math.sqrt((n*sumWXX)-(sumWX*sumWX)) 609 610 else: 611 612 if REPORT_ERRORS==True: 613 print """ERROR: astStats.weightedLSFit() 614 : divide by zero error.""" 615 return None 616 617 else: 618 if REPORT_ERRORS==True: 619 print """ERROR: astStats.weightedLSFit() : 620 dataList contains < 5 items.""" 621 return None 622 623 elif weightType == "errors": 624 sumX=0 625 sumY=0 626 sumXY=0 627 sumXX=0 628 sumSigma=0 629 n=float(len(dataList)) 630 for item in dataList: 631 sumX=sumX+(item[0]/(item[2]*item[2])) 632 sumY=sumY+(item[1]/(item[2]*item[2])) 633 sumXY=sumXY+((item[0]*item[1])/(item[2]*item[2])) 634 sumXX=sumXX+((item[0]*item[0])/(item[2]*item[2])) 635 sumSigma=sumSigma+(1.0/(item[2]*item[2])) 636 delta=(sumSigma*sumXX)-(sumX*sumX) 637 m=((sumSigma*sumXY)-(sumX*sumY))/delta 638 c=((sumXX*sumY)-(sumX*sumXY))/delta 639 mSigma=math.sqrt(sumSigma/delta) 640 cSigma=math.sqrt(sumXX/delta) 641 642 return {'slope':m, 643 'intercept':c, 644 'slopeError':mSigma, 645 'interceptError':cSigma}
646 647 #---------------------------------------------------------------------------------------------------
648 -def biweightLSFit(dataList, tuningConstant, sigmaCut = None):
649 """Performs a weighted least squares fit, where the weights used are the biweight 650 transforms of the residuals to the previous best fit .i.e. the procedure is iterative, 651 and converges very quickly (iterations is set to 10 by default). Minimum number of data 652 points is 10. 653 654 This seems to give slightly different results to the equivalent R routine, so use at your 655 own risk! 656 657 @type dataList: list 658 @param dataList: input data, must be a three dimensional list in format [x, y, y weight] 659 @type tuningConstant: float 660 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 661 scale estimates 662 @type sigmaCut: float 663 @param sigmaCut: sigma clipping to apply (set to None if not required) 664 @rtype: dictionary 665 @return: slope and intercept on y-axis, with associated errors, in the format 666 {'slope', 'intercept', 'slopeError', 'interceptError'} 667 668 @note: Returns None if an error occurs. 669 670 """ 671 672 dataCopy=[] 673 for row in dataList: 674 dataCopy.append(row) 675 676 # First perform unweighted fit, then calculate residuals 677 results=OLSFit(dataCopy) 678 origLen=len(dataCopy) 679 for k in range(10): 680 m=results[0] 681 c=results[1] 682 res=[] 683 for item in dataCopy: 684 res.append((m*item[0]+c)-item[1]) 685 686 if len(res)>5: 687 # For clipping, trim away things >3 sigma 688 # away from median 689 if sigmaClipping!=None: 690 absRes=[] 691 for item in res: 692 absRes.append(abs(item)) 693 sigma=stdev(absRes) 694 count=0 695 for item in absRes: 696 if item>(sigmaClipping*sigma) \ 697 and len(dataCopy)>2: 698 del dataCopy[count] 699 del res[count] 700 701 # Index of datalist gets out of 702 # sync with absRes as we delete 703 # items 704 count=count-1 705 706 count=count+1 707 708 # Biweight transform residuals 709 weights=biweightTransform(res, tuningConstant) 710 711 # Perform weighted fit, using biweight transforms 712 # of residuals as weight 713 wData=[] 714 for i in range(len(dataCopy)): 715 wData.append([ dataCopy[i][0], 716 dataCopy[i][1], 717 weights[i]]) 718 results=weightedLSFit(wData, "weights") 719 720 return {'slope':m, 721 'intercept':c, 722 'slopeError':mSigma, 723 'interceptError':cSigma}
724 725 #---------------------------------------------------------------------------------------------------
726 -def cumulativeBinner(data, binMin, binMax, binTotal):
727 """Bins the input data cumulatively. 728 729 @param data: input data, must be a one dimensional list 730 @type binMin: float 731 @param binMin: minimum value from which to bin data 732 @type binMax: float 733 @param binMax: maximum value from which to bin data 734 @type binTotal: int 735 @param binTotal: number of bins 736 @rtype: list 737 @return: binned data, in format [bin centre, frequency] 738 739 """ 740 #Bin data 741 binStep=float(binMax-binMin)/binTotal 742 bins=[] 743 totalItems=len(data) 744 for i in range(binTotal): 745 bins.append(0) 746 for item in data: 747 if item>(binMin+(i*binStep)): 748 bins[i]=bins[i]+1.0/totalItems 749 750 # Gnuplot requires points at bin midpoints 751 coords=[] 752 for i in range(binTotal): 753 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 754 755 return coords
756 757 #---------------------------------------------------------------------------------------------------
758 -def binner(data, binMin, binMax, binTotal):
759 """Bins the input data.. 760 761 @param data: input data, must be a one dimensional list 762 @type binMin: float 763 @param binMin: minimum value from which to bin data 764 @type binMax: float 765 @param binMax: maximum value from which to bin data 766 @type binTotal: int 767 @param binTotal: number of bins 768 @rtype: list 769 @return: binned data, in format [bin centre, frequency] 770 771 """ 772 #Bin data 773 binStep=float(binMax-binMin)/binTotal 774 bins=[] 775 for i in range(binTotal): 776 bins.append(0) 777 for item in data: 778 if item>(binMin+(i*binStep)) \ 779 and item<=(binMin+((i+1)*binStep)): 780 bins[i]=bins[i]+1 781 782 # Gnuplot requires points at bin midpoints 783 coords=[] 784 for i in range(binTotal): 785 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 786 787 return coords
788 789 #---------------------------------------------------------------------------------------------------
790 -def weightedBinner(data, weights, binMin, binMax, binTotal):
791 """Bins the input data, recorded frequency is sum of weights in bin. 792 793 @param data: input data, must be a one dimensional list 794 @type binMin: float 795 @param binMin: minimum value from which to bin data 796 @type binMax: float 797 @param binMax: maximum value from which to bin data 798 @type binTotal: int 799 @param binTotal: number of bins 800 @rtype: list 801 @return: binned data, in format [bin centre, frequency] 802 803 """ 804 #Bin data 805 binStep=float(binMax-binMin)/binTotal 806 bins=[] 807 for i in range(binTotal): 808 bins.append(0.0) 809 for item, weight in zip(data, weights): 810 if item>(binMin+(i*binStep)) \ 811 and item<=(binMin+((i+1)*binStep)): 812 bins[i]=bins[i]+weight 813 814 # Gnuplot requires points at bin midpoints 815 coords=[] 816 for i in range(binTotal): 817 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 818 819 return coords
820 821 #--------------------------------------------------------------------------------------------------- 822