diff --git a/docs/content/tools/fisher.rst b/docs/content/tools/fisher.rst index 239328dd9112e051569dd6cfac74f4f24453088f..aa771c07821cad0144acaa06628d4a7211d1843f 100644 --- a/docs/content/tools/fisher.rst +++ b/docs/content/tools/fisher.rst @@ -48,7 +48,7 @@ can do this with ``fisher`` as: #_________________________________________ # p-values for fisher's exact test left right two-tail ratio - 1.00000 0.00001 0.00001 31.667 + 1 1.3466e-05 1.3466e-05 31.667 Where we can see the constructed contingency table and the pvalues for left, right diff --git a/src/fisher/Fisher.cpp b/src/fisher/Fisher.cpp index 874da58b193c129dac4f4789898f488e36ef1867..8463e2f97b58bd3a2ec8a9a056d2753e62be736c 100644 --- a/src/fisher/Fisher.cpp +++ b/src/fisher/Fisher.cpp @@ -11,27 +11,27 @@ Fisher::Fisher(ContextFisher *context) _queryLen(0), _dbLen(0) { - _blockMgr = new BlockMgr(_context->getOverlapFraction(), _context->getReciprocal()); + _blockMgr = new BlockMgr(_context->getOverlapFraction(), _context->getReciprocal()); } Fisher::~Fisher(void) { - delete _blockMgr; - _blockMgr = NULL; + delete _blockMgr; + _blockMgr = NULL; } bool Fisher::calculate() { - if (!getFisher()) { - return false; - } + if (!getFisher()) { + return false; + } - // header - cout << "# Contingency Table" << endl; + // header + cout << "# Contingency Table" << endl; // for fisher's exact test, we need the contingency table - // XXXXXXXX | not in A | in A + // XXXXXXXX | not in A | in A // not in B | n11: in neither | n12: only in A - // in B | n21: only in B | n22: in A & B + // in B | n21: only in B | n22: in A & B // double left, right, two; @@ -46,9 +46,9 @@ bool Fisher::calculate() { long long n22 = _intersectionVal; printf("#_________________________________________\n"); - printf("# | %-12s | %-12s |\n", "not in -b", "in -b"); + printf("# | %-12s | %-12s |\n", "not in -b", "in -b"); printf("# not in -a | %-12lld | %-12lld |\n", n11, n12); - printf("# in -a | %-12lld | %-12lld |\n", n21, n22); + printf("# in -a | %-12lld | %-12lld |\n", n21, n22); printf("#_________________________________________\n"); kt_fisher_exact(n11, n12, n21, n22, &left, &right, &two); @@ -56,57 +56,55 @@ bool Fisher::calculate() { printf("# p-values for fisher's exact test\n"); printf("left\tright\ttwo-tail\tratio\n"); - printf("%.5f\t%.5f\t%.5f\t%.3f\n", left, right, two, ratio); + printf("%.5g\t%.5g\t%.5g\t%.3f\n", left, right, two, ratio); - //kt_fisher_exact(50010000, 10000000, 15000000, 3000000, &left, &right, &two); - - return true; + return true; } bool Fisher::getFisher() { - NewChromSweep sweep(_context); - if (!sweep.init()) { - return false; - } - RecordKeyVector hitSet; - while (sweep.next(hitSet)) { - if (_context->getObeySplits()) { - RecordKeyVector keySet(hitSet.getKey()); - RecordKeyVector resultSet(hitSet.getKey()); - _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); - _intersectionVal += getTotalIntersection(resultSet); - } else { - _intersectionVal += getTotalIntersection(hitSet); - } - } - - sweep.closeOut(); - _queryLen = sweep.getQueryTotalRecordLength(); - _dbLen = sweep.getDatabaseTotalRecordLength(); - - _unionVal = _queryLen + _dbLen; - return true; + NewChromSweep sweep(_context); + if (!sweep.init()) { + return false; + } + RecordKeyVector hitSet; + while (sweep.next(hitSet)) { + if (_context->getObeySplits()) { + RecordKeyVector keySet(hitSet.getKey()); + RecordKeyVector resultSet(hitSet.getKey()); + _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); + _intersectionVal += getTotalIntersection(resultSet); + } else { + _intersectionVal += getTotalIntersection(hitSet); + } + } + + sweep.closeOut(); + _queryLen = sweep.getQueryTotalRecordLength(); + _dbLen = sweep.getDatabaseTotalRecordLength(); + + _unionVal = _queryLen + _dbLen; + return true; } unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList) { - unsigned long intersection = 0; - const Record *key = recList.getKey(); - int keyStart = key->getStartPos(); - int keyEnd = key->getEndPos(); - - int hitIdx = 0; - for (RecordKeyVector::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) { - int maxStart = max((*iter)->getStartPos(), keyStart); - int minEnd = min((*iter)->getEndPos(), keyEnd); - if (_context->getObeySplits()) { - intersection += _blockMgr->getOverlapBases(hitIdx); - hitIdx++; - } else { - intersection += (unsigned long)(minEnd - maxStart); - } - } - _numIntersections += (int)recList.size(); - return intersection; + unsigned long intersection = 0; + const Record *key = recList.getKey(); + int keyStart = key->getStartPos(); + int keyEnd = key->getEndPos(); + + int hitIdx = 0; + for (RecordKeyVector::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) { + int maxStart = max((*iter)->getStartPos(), keyStart); + int minEnd = min((*iter)->getEndPos(), keyEnd); + if (_context->getObeySplits()) { + intersection += _blockMgr->getOverlapBases(hitIdx); + hitIdx++; + } else { + intersection += (unsigned long)(minEnd - maxStart); + } + } + _numIntersections += (int)recList.size(); + return intersection; }