31 std::map<std::string, std::string> &JSONCfg)
32 : placementInfo(placementInfo), timingInfo(placementInfo->getTimingInfo()), JSONCfg(JSONCfg)
34 if (
JSONCfg.find(
"PlacementTimingOptimizerVerbose") !=
JSONCfg.end())
53 int cellId = tmpCell->getCellId();
54 auto tmpCellLoc = cellLoc[cellId];
55 for (
auto tmpPin : tmpCell->getPins())
57 float pinX = tmpCellLoc.X + tmpPin->getOffsetXInCell();
58 float pinY = tmpCellLoc.Y + tmpPin->getOffsetYInCell();
59 assert((
unsigned int)tmpPin->getElementIdInType() < pinLoc.size());
60 pinLoc[tmpPin->getElementIdInType()].X = pinX;
61 pinLoc[tmpPin->getElementIdInType()].Y = pinY;
71 unsigned int srcCellId = srcCell->
getCellId();
72 auto srcNode = timingNodes[srcCellId];
73 auto srcLoc = cellLoc[srcCellId];
75 float worstSlack = 100;
78 for (
auto pinBeDriven : outputNet->getPinsBeDriven())
81 auto sinkCell = pinBeDriven->getCell();
82 unsigned int sinkCellId = sinkCell->getCellId();
83 auto sinkNode = timingNodes[sinkCellId];
84 auto sinkLoc = cellLoc[sinkCellId];
85 int succPathLen = sinkNode->getLongestPathLength();
86 float netDelay =
getDelayByModel(sinkNode, srcNode, sinkLoc.X, sinkLoc.Y, srcLoc.X, srcLoc.Y);
87 float slack = sinkNode->getRequiredArrivalTime() - srcNode->getLatestOutputArrival() - netDelay;
89 if (slack < worstSlack)
104 for (
unsigned int i = 0;
i < timingGraph->getNodes().size();
i++)
106 if (timingGraph->getNodes()[
i]->getLatestInputArrival() > maxDelay)
108 maxDelay = timingGraph->
getNodes()[
i]->getLatestInputArrival();
113 std::vector<std::vector<int>> resPaths;
114 auto resPath = timingGraph->backTraceDelayLongestPathFromNode(maxDelayId);
120 int pathNumThr,
int converThr)
126 std::vector<int> isCovered(timingGraph->getNodes().size(), 0);
127 std::vector<std::vector<int>> resPaths;
129 for (
auto curEndpoint : timingGraph->getSortedTimingEndpoints())
131 if (isCovered[curEndpoint->getId()])
133 if (curEndpoint->getLatestInputArrival() / timingGraph->getClockPeriod() < criticalRatio)
135 std::vector<int> resPath;
136 bool findSuccessfully =
137 timingGraph->backTraceDelayLongestPathFromNode(curEndpoint->getId(), isCovered, resPath, converThr);
138 if (findSuccessfully)
143 resPaths.push_back(resPath);
146 for (
auto cellId : resPath)
151 isCovered[unpackedCell->getCell()->getCellId()]++;
155 for (
auto cell : macro->getCells())
157 isCovered[cell->getCellId()]++;
164 if (resPaths.size() > pathNumThr)
171 std::vector<std::vector<int>>
178 std::vector<int> isCovered(timingGraph->getNodes().size(), 0);
179 std::vector<std::vector<int>> resPaths;
181 for (
auto curEndpoint : timingGraph->getSortedTimingEndpoints())
183 if (isCovered[curEndpoint->getId()] || !FFDirectlyDrivenButNotInOneSlot[curEndpoint->getId()])
185 if (curEndpoint->getLatestInputArrival() / timingGraph->getClockPeriod() < criticalRatio)
187 std::vector<int> resPath;
188 bool findSuccessfully =
189 timingGraph->backTraceDelayLongestPathFromNode(curEndpoint->getId(), isCovered, resPath, 100);
190 if (findSuccessfully)
196 resPaths.push_back(resPath);
197 for (
auto cellId : resPath)
202 isCovered[unpackedCell->getCell()->getCellId()] = 1;
206 for (
auto cell : macro->getCells())
208 isCovered[cell->getCellId()] = 1;
220 print_status(
"PlacementTimingOptimizer: conducting Static Timing Analysis");
222 unsigned int highFanoutThr = 10000;
226 highFanoutThr = 1000;
234 if (disableOptimisticTiming)
243 bool printOut =
false;
244 std::string dumpFileName =
"optNetDelayInfo.txt";
245 std::ofstream outfile0;
246 if (
JSONCfg.find(
"PlacementTimingOptimizer_EdgesDelayLog") !=
JSONCfg.end())
249 std::string dumpFileName =
JSONCfg[
"PlacementTimingOptimizer_EdgesDelayLog"];
250 print_status(
"PlacementTimingOptimizer: dumping PlacementTimingOptimizer_EdgesDelayLog to: " + dumpFileName);
251 outfile0.open(dumpFileName.c_str());
252 assert(outfile0.is_open() && outfile0.good() &&
253 "The path for PlacementTimingOptimizer_EdgesDelayLog does not exist and please check "
254 "your path settings");
270 auto &edges = timingGraph->getEdges();
271 int numEdges = edges.size();
273 #pragma omp parallel for
274 for (
int i = 0;
i < numEdges;
i++)
276 auto edge = edges[
i];
277 auto &pin1Loc = pinLoc[edge->getSourcePin()->getElementIdInType()];
278 auto &pin2Loc = pinLoc[edge->getSinkPin()->getElementIdInType()];
279 if (pin1Loc.X < -5 && pin1Loc.Y < -5)
281 if (pin2Loc.X < -5 && pin2Loc.Y < -5)
284 edge->setDelay(
getDelayByModel(edge->getSink(), edge->getSource(), pin1Loc.X, pin1Loc.Y, pin2Loc.X, pin2Loc.Y));
287 timingGraph->propogateArrivalTime();
288 timingGraph->backPropogateRequiredArrivalTime();
289 timingGraph->updateCriticalPath();
291 auto resPath = timingGraph->backTraceDelayLongestPathFromNode(timingGraph->getCriticalEndPoint());
293 std::cout <<
"An example of long delay path for the current placement:\n";
294 for (
auto id : resPath)
296 std::cout <<
designInfo->
getCells()[id] <<
" X:" << cellLoc[id].X <<
" Y:" << cellLoc[id].Y
297 <<
" [delay]: " << timingGraph->getNodes()[id]->getLatestInputArrival()
298 <<
" [required]: " << timingGraph->getNodes()[id]->getRequiredArrivalTime() <<
"\n";
356 if (node->getOutEdges().size() > 32)
358 for (
auto edge : node->getOutEdges())
360 outfile0 <<
" src:" << edge->getSourcePin()->getName() <<
" sink:" << edge->getSinkPin()->getName()
361 <<
" delay:" << edge->getDelay() <<
"\n";
367 return timingGraph->getCriticalPathDelay();
377 assert(cellLoc.size() == timingNodes.size());
383 std::vector<int> slackCntVec(200, 0);
384 int totalSlackChecked = 0;
390 auto designNet = curNet->getDesignNet();
391 if (designNet->checkIsPowerNet() || designNet->checkIsGlobalClock())
394 if (curNet->getDriverUnits().size() != 1 || curNet->getUnits().size() <= 1 || curNet->getUnits().size() >= 1000)
396 auto &pins = designNet->getPins();
397 int pinNum = pins.size();
399 assert(curNet->getUnits().size() == (
unsigned int)pinNum);
401 int driverPinInNet = -1;
403 for (
int i = 0;
i < pinNum;
i++)
405 if (pins[
i]->isOutputPort())
412 assert(driverPinInNet >= 0);
415 auto srcCell = pins[driverPinInNet]->getCell();
416 unsigned int srcCellId = srcCell->getCellId();
417 auto srcNode = timingNodes[srcCellId];
418 auto srcLoc = cellLoc[srcCellId];
422 for (
int pinBeDriven = 0; pinBeDriven < pinNum; pinBeDriven++)
424 if (pinBeDriven == driverPinInNet)
428 auto sinkCell = pins[pinBeDriven]->getCell();
429 unsigned int sinkCellId = sinkCell->getCellId();
430 auto sinkNode = timingNodes[sinkCellId];
431 auto sinkLoc = cellLoc[sinkCellId];
432 float netDelay =
getDelayByModel(sinkNode, srcNode, sinkLoc.X, sinkLoc.Y, srcLoc.X, srcLoc.Y);
433 float slack = sinkNode->getRequiredArrivalTime() - srcNode->getLatestOutputArrival() - netDelay;
439 int slotId = (int)(-slack / 0.1);
444 slackCntVec[slotId] += 1;
450 int slackUnderThrCnt = 0;
451 float careRatio = 0.333;
453 if (careRatio <= 0.9 && totalSlackChecked > 0)
455 for (
unsigned int i = 0;
i < slackCntVec.size();
i++)
457 slackUnderThrCnt += slackCntVec[
i];
458 if (slackUnderThrCnt > totalSlackChecked * (1 - careRatio))
464 std::string outputStr =
"";
465 for (
unsigned int i = 0;
i < slackCntVec.size();
i++)
467 outputStr +=
" " + std::to_string(slackCntVec[
i]);
469 print_info(
"Slack distribution:" + outputStr);
470 print_info(
"slackThr = " + std::to_string(slackThr));
475 float targetX,
float targetY)
477 print_status(
"PlacementTimingOptimizer: conducting incremental Static Timing Analysis");
485 auto &edges = timingGraph->getEdges();
486 int numEdges = edges.size();
488 #pragma omp parallel for
489 for (
int i = 0;
i < numEdges;
i++)
491 auto edge = edges[
i];
492 auto &pin1Loc = pinLoc[edge->getSourcePin()->getElementIdInType()];
493 auto &pin2Loc = pinLoc[edge->getSinkPin()->getElementIdInType()];
494 if (pin1Loc.X < -5 && pin1Loc.Y < -5)
496 if (pin2Loc.X < -5 && pin2Loc.Y < -5)
499 edge->setDelay(
getDelayByModel(edge->getSink(), edge->getSource(), pin1Loc.X, pin1Loc.Y, pin2Loc.X, pin2Loc.Y));
502 timingGraph->propogateArrivalTime();
503 timingGraph->backPropogateRequiredArrivalTime();
506 for (
unsigned int i = 0;
i < timingGraph->getNodes().size();
i++)
508 if (timingGraph->getNodes()[
i]->getLatestInputArrival() > maxDelay)
510 maxDelay = timingGraph->getNodes()[
i]->getLatestInputArrival();
515 auto resPath = timingGraph->backTraceDelayLongestPathFromNode(maxDelayId);
517 std::cout <<
"An example of long delay path for the current placement:\n";
518 for (
auto id : resPath)
521 <<
" [delay]: " << timingGraph->getNodes()[id]->getLatestInputArrival()
522 <<
" [required]: " << timingGraph->getNodes()[id]->getRequiredArrivalTime() <<
"\n";
532 print_warning(
"PlacementTimingOptimizer: clustering long path in one clock region");
542 PU2ClockRegionCenter.clear();
543 PU2ClockRegionColumn.clear();
545 std::set<int> extractedCellIds;
546 std::set<PlacementInfo::PlacementUnit *> extractedPUs;
548 extractedCellIds.clear();
549 extractedPUs.clear();
552 unsigned int fanoutThr = 512;
556 for (
unsigned int nodeId = 0; nodeId < timingNodes.size() * 0.1; nodeId++)
558 auto timingNode = timingNodes[nodeId];
559 if (timingNode->getLongestPathLength() > pathLenThr)
561 if (extractedCellIds.find(nodeId) != extractedCellIds.end())
563 auto candidateCellIds =
564 simpleTimingGraph->DFSFromNode(timingNode->getId(), pathLenThr, sizeThr, extractedCellIds, 1000000);
566 if (candidateCellIds.size() >= pathLenThr * 0.8)
568 std::set<PlacementInfo::PlacementUnit *> PUsInLongPaths;
569 PUsInLongPaths.clear();
571 float totalWeight = 0;
572 for (
auto &candidateCellId : candidateCellIds)
575 if (extractedPUs.find(PUInPath) == extractedPUs.end() &&
576 PUsInLongPaths.find(PUInPath) == PUsInLongPaths.end())
578 PUsInLongPaths.insert(PUInPath);
583 if (totalWeight >= pathLenThr)
585 std::map<std::pair<int, int>,
float> clockRegionYX2Cnt;
586 clockRegionYX2Cnt.clear();
588 float maxClockRegionWeight = 0;
589 float totalClockRegionWeight = 0;
590 std::pair<int, int> optClockLocYX;
592 for (
auto tmpPU : PUsInLongPaths)
596 int cellId = unpackedCell->getCell()->getCellId();
597 int clockRegionX, clockRegionY;
598 auto tmpLoc = cellLoc[cellId];
600 std::pair<int, int> tmpClockLocYX(-1, clockRegionX);
605 if (clockRegionYX2Cnt.find(tmpClockLocYX) == clockRegionYX2Cnt.end())
607 clockRegionYX2Cnt[tmpClockLocYX] = 0;
609 clockRegionYX2Cnt[tmpClockLocYX] += 1;
610 totalClockRegionWeight += 1;
612 if (clockRegionYX2Cnt[tmpClockLocYX] > maxClockRegionWeight)
614 maxClockRegionWeight = clockRegionYX2Cnt[tmpClockLocYX];
615 optClockLocYX = tmpClockLocYX;
620 for (
auto tmpCell : curMacro->getCells())
622 int cellId = tmpCell->getCellId();
623 int clockRegionX, clockRegionY;
624 auto tmpLoc = cellLoc[cellId];
627 std::pair<int, int> tmpClockLocYX(-1, clockRegionX);
628 if (clockRegionYX2Cnt.find(tmpClockLocYX) == clockRegionYX2Cnt.end())
630 clockRegionYX2Cnt[tmpClockLocYX] = 0;
632 clockRegionYX2Cnt[tmpClockLocYX] += 1;
633 totalClockRegionWeight += 1;
635 if (clockRegionYX2Cnt[tmpClockLocYX] > maxClockRegionWeight)
637 maxClockRegionWeight = clockRegionYX2Cnt[tmpClockLocYX];
638 optClockLocYX = tmpClockLocYX;
644 if ((maxClockRegionWeight > totalClockRegionWeight * clusterThrRatio) &&
645 maxClockRegionWeight >= pathLenThr * 0.333)
651 auto optClockRegion = YX2ClockRegion[0][optClockLocYX.second];
652 float cX = (optClockRegion->getLeft() + optClockRegion->getRight()) / 2;
653 std::vector<int> PUIdsInLongPaths;
654 PUIdsInLongPaths.clear();
655 bool abandon =
false;
656 for (
auto curPU : PUsInLongPaths)
658 if (curPU->checkHasDSP())
663 for (
auto curPU : PUsInLongPaths)
665 if (!curPU->isFixed() && !curPU->checkHasBRAM() && !curPU->checkHasDSP())
668 float fY = curPU->Y();
671 extractedPUs.insert(curPU);
673 PU2ClockRegionCenter[curPU] = std::pair<float, float>(fX, fY);
674 PU2ClockRegionColumn[curPU] = optClockLocYX.second;
678 int cellId = unpackedCell->getCell()->getCellId();
679 if (timingNodes[cellId]->getOutEdges().size() < fanoutThr)
680 extractedCellIds.insert(cellId);
684 for (
auto tmpCell : curMacro->getCells())
686 int cellId = tmpCell->getCellId();
687 if (timingNodes[cellId]->getOutEdges().size() < fanoutThr)
688 extractedCellIds.insert(cellId);
691 PUIdsInLongPaths.push_back(curPU->getId());
696 std::cout <<
"maxClockRegionWeight: " << maxClockRegionWeight
697 <<
" totalClockRegionWeight:" << totalClockRegionWeight
698 <<
" #extractedCellIds=" << extractedCellIds.size()
699 <<
" #extractedPUs=" << extractedPUs.size()
700 <<
" pathLength=" << timingNode->getLongestPathLength() <<
"\n";
703 else if (totalClockRegionWeight >= 20000)
705 for (
auto tmpCellId : candidateCellIds)
707 if (timingNodes[tmpCellId]->getOutEdges().size() < fanoutThr)
708 extractedCellIds.insert(tmpCellId);
715 if (extractedCellIds.size() > timingNodes.size() * 0.2 && sizeThr > 20000)
717 PU2ClockRegionCenter.clear();
718 PU2ClockRegionColumn.clear();
720 extractedCellIds.clear();
721 extractedPUs.clear();
738 int clockRegionXNum = YX2ClockRegion[0].size();
739 std::vector<int> curClockRegionX2cellNum(clockRegionXNum, 0);
740 std::vector<std::vector<PlacementInfo::PlacementUnit *>> clockRegionX2PUs(
741 clockRegionXNum, std::vector<PlacementInfo::PlacementUnit *>(0));
747 int clockRegionX, clockRegionY;
749 curClockRegionX2cellNum[clockRegionX] += PU->getWeight();
750 clockRegionX2PUs[clockRegionX].push_back(PU);
755 if (binGrid[binIdY][binIdX]->getCells().size() > 16)
757 if (PU->Y() > curClockRegionX2MaxY[clockRegionX])
759 curClockRegionX2MaxY[clockRegionX] = PU->Y();
761 if (PU->Y() < curClockRegionX2MinY[clockRegionX])
763 curClockRegionX2MinY[clockRegionX] = PU->Y();
768 std::vector<int> newClockRegionX2cellNum = curClockRegionX2cellNum;
769 for (
auto pair : PU2ClockRegionColumn)
771 auto PU = pair.first;
772 int oriClockRegionX, oriClockRegionY;
774 newClockRegionX2cellNum[oriClockRegionX] -= PU->getWeight();
775 newClockRegionX2cellNum[pair.second] += PU->getWeight();
778 for (
unsigned int colX = 0; colX < curClockRegionX2cellNum.size(); colX++)
780 float stretchRatio = (float)newClockRegionX2cellNum[colX] / (
float)curClockRegionX2cellNum[colX];
784 float completeH = topLimit - bottomLimit;
785 float oriTopY = curClockRegionX2MaxY[colX];
786 float oriBottomY = curClockRegionX2MinY[colX];
787 float oriH = std::abs(oriTopY - oriBottomY);
788 bool maybeCongestion =
789 completeH * 0.625 > oriH && (oriBottomY < 0.075 * completeH || oriTopY > 0.925 * completeH);
791 if (oriBottomY < 0.075 * completeH)
792 bottomLimit = std::max(std::max(oriBottomY, bottomLimit), (
float)0.05 * completeH);
793 if (oriTopY > 0.925 * completeH)
794 topLimit = std::min(std::min(oriTopY, topLimit), (
float)0.95 * completeH);
796 if (std::abs(oriTopY - oriBottomY) < 0.1)
798 if (stretchRatio > 1 || (maybeCongestion && stretchRatio > 0.95))
802 print_warning(
"PlacementTimingOptimizer: " + std::to_string(colX) +
803 "th column is stretched more to avoid potential congestion");
807 stretchRatio *= 1.05;
808 float newH = stretchRatio * oriH;
809 float deltaH = newH - oriH;
810 float newTopY = oriTopY + deltaH / 2;
811 float newBottomY = oriBottomY - deltaH / 2;
813 if (newBottomY < bottomLimit)
815 newTopY += (bottomLimit - newBottomY);
816 newBottomY = bottomLimit;
819 if (newTopY > topLimit)
821 newBottomY = std::max(bottomLimit, newBottomY - (newTopY - topLimit));
824 newH = newTopY - newBottomY;
827 stretchRatio = newH / oriH;
828 for (
auto PU : clockRegionX2PUs[colX])
832 float fY = (PU->Y() - oriBottomY) * stretchRatio + newBottomY;
835 PU->setAnchorLocationAndForgetTheOriginalOne(fX, fY);
838 print_warning(
"PlacementTimingOptimizer: " + std::to_string(colX) +
"th column is stretched by " +
839 std::to_string(stretchRatio) +
" oriBottomY=" + std::to_string(oriBottomY) +
840 " oriTopY=" + std::to_string(oriTopY) +
" newBottomY=" + std::to_string(newBottomY) +
841 " newTopY=" + std::to_string(newTopY));
850 std::string dumpClockRegionClustersFile =
JSONCfg[
"Dump Cluster file"] +
"-clockRegion";
851 if (dumpClockRegionClustersFile !=
"")
853 print_status(
"dumping cluster information to " + dumpClockRegionClustersFile);
854 std::ofstream outfile0((dumpClockRegionClustersFile +
".tcl").c_str());
855 assert(outfile0.is_open() && outfile0.good() &&
856 "The path for cluster result dumping does not exist and please check your path settings");
859 outfile0 <<
"highlight -color_index " << (cluster_id) % 20 + 1 <<
" [get_cells {";
865 for (
auto cell : tmpMacro->getCells())
867 outfile0 << cell->getName() <<
" ";
873 outfile0 << tmpUnpacked->getName() <<
" ";
899 assert(cellLoc.size() == timingNodes.size());
901 unsigned int highFanoutThr = 1000;
907 #pragma omp parallel for
908 for (
int PUId = 0; PUId < PUNum; PUId++)
912 for (
auto curNet : *(curPU->getNetsSetPtr()))
914 if (curNet->getDriverUnits().size() == 0)
916 if (curNet->getDriverUnits()[0] != curPU)
919 auto designNet = curNet->getDesignNet();
921 if (designNet->checkIsPowerNet() || designNet->checkIsGlobalClock())
924 if (curNet->getDriverUnits().size() != 1 || curNet->getUnits().size() <= 1 ||
925 curNet->getUnits().size() >= highFanoutThr)
927 auto &PUs = curNet->getUnits();
928 auto &pins = designNet->getPins();
929 int pinNum = pins.size();
931 assert(curNet->getUnits().size() == (
unsigned int)pinNum);
933 int driverPinInNet = -1;
935 for (
int i = 0;
i < pinNum;
i++)
937 if (pins[
i]->isOutputPort())
944 assert(driverPinInNet >= 0);
947 auto srcCell = pins[driverPinInNet]->getCell();
948 unsigned int srcCellId = srcCell->getCellId();
949 auto srcNode = timingNodes[srcCellId];
950 auto srcLoc = cellLoc[srcCellId];
952 for (
int pinBeDriven = 0; pinBeDriven < pinNum; pinBeDriven++)
954 if (pinBeDriven == driverPinInNet)
958 auto sinkCell = pins[pinBeDriven]->getCell();
959 unsigned int sinkCellId = sinkCell->getCellId();
960 auto sinkNode = timingNodes[sinkCellId];
961 auto sinkLoc = cellLoc[sinkCellId];
963 float netDelay =
getDelayByModel(sinkNode, srcNode, sinkLoc.X, sinkLoc.Y, srcLoc.X, srcLoc.Y);
964 float slack = sinkNode->getRequiredArrivalTime() - srcNode->getLatestOutputArrival() - netDelay;