AMF-Placer  2.0
An Open-Source Timing-driven Analytical Mixed-size FPGA Placer
ParallelCLBPacker_PackingCLBSite.cc
Go to the documentation of this file.
1 
26 #include "ParallelCLBPacker.h"
27 
29 {
30  std::sort(priorityQueue.begin(), priorityQueue.end(), [](PackingCLBCluster *a, PackingCLBCluster *b) -> bool {
31  return (a->getScoreInSite() == b->getScoreInSite()) ? a->getHash() > b->getHash()
32  : (a->getScoreInSite() > b->getScoreInSite());
33  });
34  for (unsigned int i = 1; i < priorityQueue.size(); i++)
35  {
36  if (!(priorityQueue[i - 1]->getScoreInSite() >= priorityQueue[i]->getScoreInSite()))
37  {
38  std::cout << "priorityQueue[" << i - 1 << "]:\n" << priorityQueue[i - 1] << "\n";
39  std::cout << "priorityQueue[i]:\n" << priorityQueue[i] << "\n";
40  for (unsigned int j = 0; j < priorityQueue.size(); j++)
41  {
42  std::cout << "#" << j << ": " << priorityQueue[j]->getScoreInSite() << "\n";
43  }
44  }
45  assert(priorityQueue[i - 1]->getScoreInSite() >= priorityQueue[i]->getScoreInSite());
46  }
47 }
48 
50 {
51  int validCnt = 0;
52  for (unsigned int i = 0; i < priorityQueue.size(); i++)
53  {
54  if (priorityQueue[i]->areAllPUsValidForThisSite(PUId2PackingCLBSite, this))
55  {
56  priorityQueue[validCnt] = priorityQueue[i];
57  validCnt++;
58  }
59  else
60  {
61  delete priorityQueue[i];
62  priorityQueue[i] = nullptr;
63  }
64  }
65  if (determinedClusterInSite)
66  assert(determinedClusterInSite->areAllPUsValidForThisSite(PUId2PackingCLBSite, this));
67  priorityQueue.resize(validCnt);
68 }
69 
71 {
72  if (determinedClusterInSite)
73  {
74  int validCnt = 0;
75  for (unsigned int i = 0; i < priorityQueue.size(); i++)
76  {
77  bool changed = false;
78  for (auto tmpPU : determinedClusterInSite->getPUs())
79  {
80  if (!priorityQueue[i]->contains(tmpPU))
81  {
82  if (!priorityQueue[i]->addPU(tmpPU, true))
83  {
84  delete priorityQueue[i];
85  priorityQueue[i] = nullptr;
86  break;
87  }
88  else
89  {
90  changed = true;
91  }
92  }
93  }
94  if (priorityQueue[i])
95  {
96  if (changed)
97  {
98  priorityQueue[i]->refreshId();
99  priorityQueue[i]->updateScoreInSite();
100  }
101  priorityQueue[validCnt] = priorityQueue[i];
102  validCnt++;
103  }
104  }
105  priorityQueue.resize(validCnt);
106  }
107 }
108 
110 {
111  std::set<PlacementInfo::PlacementUnit *, Packing_PUcompare> newNbr;
112  newNbr.clear();
113  for (auto tmpPU : neighborPUs)
114  {
115  bool PUValid = true;
116  if (PUId2PackingCLBSite[tmpPU->getId()])
117  {
118  if (PUId2PackingCLBSite[tmpPU->getId()] != this)
119  {
120  PUValid = false;
121  }
122  }
123  if (PUValid)
124  {
125  bool detCompatible = true;
126  if (determinedClusterInSite)
127  {
128  if (!determinedClusterInSite->contains(tmpPU))
129  {
130  PackingCLBCluster *testDetCluster = new PackingCLBCluster(determinedClusterInSite);
131  detCompatible = (testDetCluster->addPU(tmpPU, true));
132  delete testDetCluster;
133  }
134  }
135  if (detCompatible)
136  newNbr.insert(tmpPU);
137  }
138  }
139  neighborPUs = newNbr;
140 }
141 
143 {
144  if (priorityQueue.size()) // if there are possible condidates
145  {
146  PackingCLBCluster *topCluster = priorityQueue[0];
147  std::vector<PlacementInfo::PlacementUnit *> countedPUs;
148  countedPUs.clear();
149  for (auto pair : PU2TopCnt)
150  countedPUs.push_back(pair.first);
151 
152  std::vector<PlacementInfo::PlacementUnit *> PUsToAdd;
153  PUsToAdd.clear();
154  for (auto PU : countedPUs)
155  {
156  if (topCluster->contains(PU) && PUId2PackingCLBSite[PU->getId()] == this)
157  {
158  assert(PU2TopCnt.find(PU) != PU2TopCnt.end());
159  PU2TopCnt[PU]++;
160  if (PU2TopCnt[PU] >= unchangedIterationThr)
161  {
162  if (determinedClusterInSite) // if there is determinedClusterInSite, only add PUs not in in
163  {
164  if (!determinedClusterInSite->contains(PU))
165  {
166  PUsToAdd.push_back(PU);
167  }
168  }
169  else // if there is no determinedClusterInSite yet, all PUs meeting requirement can add
170  {
171  PUsToAdd.push_back(PU);
172  }
173  }
174  }
175  else
176  {
177  PU2TopCnt.erase(PU); // erase the records of the PUs not in top
178  }
179  }
180 
181  // add the PUs into record if they are not recorded yet
182  for (auto PU : topCluster->getPUs())
183  {
184  if (PU2TopCnt.find(PU) == PU2TopCnt.end())
185  {
186  PU2TopCnt[PU] = 1;
187  }
188  }
189 
190  if (PUsToAdd.size()) // some new consistent PUs found in top
191  {
192 
193  // remove candidates in PQ which is not compatible with added PUs
194  int validCnt = 0;
195  for (unsigned int i = 0; i < priorityQueue.size(); i++)
196  {
197  bool changed = false;
198  for (auto tmpPU : PUsToAdd)
199  {
200  if (!priorityQueue[i]->contains(tmpPU))
201  {
202  if (!priorityQueue[i]->addPU(tmpPU, true))
203  {
204  delete priorityQueue[i];
205  priorityQueue[i] = nullptr;
206  break;
207  }
208  else
209  {
210  changed = true;
211  }
212  }
213  }
214  if (priorityQueue[i])
215  {
216  if (changed)
217  {
218  priorityQueue[i]->refreshId();
219  priorityQueue[i]->updateScoreInSite();
220  }
221  priorityQueue[validCnt] = priorityQueue[i];
222  validCnt++;
223  }
224  }
225  priorityQueue.resize(validCnt);
226 
227  if (!determinedClusterInSite) // if there is determinedClusterInSite
228  {
229  delete determinedClusterInSite;
230  // priorityQueue.push_back(determinedClusterInSite);
231  }
232 
233  determinedClusterInSite = new PackingCLBCluster(topCluster);
234 
235  for (auto countedPair : PU2TopCnt)
236  {
237  if (countedPair.second < unchangedIterationThr ||
238  !placementInfo->checkClockColumnLegalization(countedPair.first, CLBSite))
239  {
240  determinedClusterInSite->removePUToConstructDetCluster(countedPair.first);
241  }
242  else
243  {
244  placementInfo->addPUIntoClockColumn(countedPair.first, CLBSite);
245  }
246  }
247 
248  determinedClusterInSite->clusterHash();
249  determinedClusterInSite->refreshId();
250  determinedClusterInSite->updateScoreInSite();
251  detScore = determinedClusterInSite->getScoreInSite();
252  }
253  }
254 }
255 
257 {
258 
259  std::set<int> hashIdSet;
260  hashIdSet.clear();
261  for (auto tmpSeedCluster : seedClusters)
262  {
263  hashIdSet.insert(tmpSeedCluster->getHash());
264  }
265  for (auto tmpSeedCluster : seedClusters)
266  {
267  for (auto tmpPU : neighborPUs)
268  {
269  // if (hashIdSet.find(tmpSeedCluster->clusterHashWithAdditionalPU(tmpPU)) != hashIdSet.end())
270  // continue;
271  PackingCLBCluster *tmpCluster = new PackingCLBCluster(tmpSeedCluster);
272 
273  if (tmpCluster->addPU(tmpPU))
274  {
275  tmpCluster->refreshId();
276  tmpCluster->incrementalUpdateScoreInSite(tmpPU);
277  tmpCluster->clusterHash();
278  if (hashIdSet.find(tmpCluster->getHash()) != hashIdSet.end())
279  {
280  delete tmpCluster;
281  tmpCluster = nullptr;
282  }
283  else
284  {
285  hashIdSet.insert(tmpCluster->getHash());
286  priorityQueue.push_back(tmpCluster);
287  }
288  }
289  else
290  {
291  // if (tmpSeedCluster->getPUs().size() == 0 && !isCarrySite && !isLUTRAMSite &&
292  // (tmpSeedCluster->isPUTypeCompatibleWithSiteType(tmpPU)))
293  // {
294  // PackingCLBCluster *tmpCluster = new PackingCLBCluster(tmpSeedCluster);
295  // tmpCluster->addPUFailReason(tmpPU);
296  // delete tmpCluster;
297  // std::cout.flush();
298  // assert(false);
299  // }
300  // assert(tmpCluster->getPUs().size() || isCarrySite || isLUTRAMSite ||
301  // !(tmpSeedCluster->isPUTypeCompatibleWithSiteType(tmpPU)));
302  delete tmpCluster;
303  tmpCluster = nullptr;
304  }
305  }
306  }
307 }
308 
309 std::set<PlacementInfo::PlacementUnit *, Packing_PUcompare> *
311  DesignInfo::DesignCellType curCellType, float targetX, float targetY, float displacementLowerbound,
312  float displacementUpperbound, int PUNumThreshold, const std::vector<PackingCLBSite *> &PUId2PackingCLBSite,
313  float y2xRatio, std::set<PlacementInfo::PlacementUnit *, Packing_PUcompare> *res, bool clockRegionAware)
314 {
315  assert(displacementLowerbound < displacementUpperbound);
316  // please note that the input DesignCell is only used to find the corresponding binGrid for site search.
317  if (!res)
318  {
319  std::set<PlacementInfo::PlacementUnit *, Packing_PUcompare> *res =
320  new std::set<PlacementInfo::PlacementUnit *, Packing_PUcompare>();
321  res->clear();
322  }
323 
324  int binIdX, binIdY, clockRegionX, clockRegionY;
325  placementInfo->getGridXY(targetX, targetY, binIdX, binIdY);
326  placementInfo->getDeviceInfo()->getClockRegionByLocation(targetX, targetY, clockRegionX, clockRegionY);
327 
328  auto sharedTypeIds = placementInfo->getPotentialBELTypeIDs(curCellType);
329 
330  for (auto sharedTypeId : sharedTypeIds)
331  {
332  std::vector<std::vector<PlacementInfo::PlacementBinInfo *>> &curBinGrid =
333  placementInfo->getBinGrid(sharedTypeId);
334  assert(binIdY >= 0);
335  assert((unsigned int)binIdY < curBinGrid.size());
336  assert(binIdX >= 0);
337  assert((unsigned int)binIdX < curBinGrid[binIdY].size());
338 
339  std::queue<std::pair<int, int>> binXYqueue;
340  std::set<std::pair<int, int>> reachedBinXYs;
341  binXYqueue.emplace(binIdX, binIdY);
342  reachedBinXYs.emplace(binIdX, binIdY);
343 
344  while (binXYqueue.size() > 0)
345  {
346  std::pair<int, int> curXY = binXYqueue.front();
347  binXYqueue.pop();
348  int curbinIdX = curXY.first, curbinIdY = curXY.second;
349 
350  PlacementInfo::PlacementBinInfo *curBin = curBinGrid[curbinIdY][curbinIdX];
351  float bin2TargetXYDistance = curBin->getManhattanDistanceTo(targetX, targetY);
352  if (bin2TargetXYDistance > displacementUpperbound)
353  continue;
354  for (auto tmpCell : curBin->getCells())
355  {
356  if (tmpCell->isLUT() || tmpCell->isFF())
357  {
358  auto tmpPU = placementInfo->getPlacementUnitByCell(tmpCell);
359 
360  if (tmpPU->isFixed() || tmpPU->isPacked())
361  continue;
362  if (auto tmpMacro = dynamic_cast<PlacementInfo::PlacementMacro *>(tmpPU))
363  {
364  if (tmpMacro->getMacroType() == PlacementInfo::PlacementMacro::PlacementMacroType_CARRY ||
365  tmpMacro->getMacroType() == PlacementInfo::PlacementMacro::PlacementMacroType_MCLB)
366  {
367  continue;
368  }
369  }
370  if (!PUId2PackingCLBSite[tmpPU->getId()])
371  {
372  float tmpPUDis = fabs(targetX - tmpPU->X()) + y2xRatio * fabs(targetY - tmpPU->Y());
373  if (tmpPUDis > displacementLowerbound && tmpPUDis <= displacementUpperbound)
374  {
375  int PUClockRegionX, PUClockRegionY;
376  placementInfo->getDeviceInfo()->getClockRegionByLocation(tmpPU->X(), targetY,
377  PUClockRegionX, PUClockRegionY);
378  if (PUClockRegionX == clockRegionX)
379  res->insert(tmpPU);
380  }
381  }
382  else
383  {
384  if (PUId2PackingCLBSite[tmpPU->getId()] == this)
385  {
386  float tmpPUDis = fabs(targetX - tmpPU->X()) + y2xRatio * fabs(targetY - tmpPU->Y());
387  if (tmpPUDis > displacementLowerbound && tmpPUDis <= displacementUpperbound)
388  {
389  int PUClockRegionX, PUClockRegionY;
391  tmpPU->X(), targetY, PUClockRegionX, PUClockRegionY);
392  if (PUClockRegionX == clockRegionX)
393  res->insert(tmpPU);
394  }
395  }
396  }
397  }
398  }
399 
400  // if (res->size() < (unsigned int)PUNumThreshold)
401  {
402  for (int nextY = curbinIdY - 1; nextY <= curbinIdY + 1; nextY++)
403  {
404  for (int nextX = curbinIdX - 1; nextX <= curbinIdX + 1; nextX++)
405  {
406  if (!(nextY >= 0))
407  continue;
408  if (!((unsigned int)nextY < curBinGrid.size()))
409  continue;
410  if (!(nextX >= 0))
411  continue;
412  if (!((unsigned int)nextX < curBinGrid[binIdY].size()))
413  continue;
414  PlacementInfo::PlacementBinInfo *nextBin = curBinGrid[nextY][nextX];
415  float nextBin2TargetXYDistance = nextBin->getManhattanDistanceTo(targetX, targetY);
416  if (nextBin2TargetXYDistance > displacementUpperbound)
417  continue;
418  std::pair<int, int> nextXY(nextX, nextY);
419  if (reachedBinXYs.find(nextXY) == reachedBinXYs.end())
420  {
421  reachedBinXYs.insert(nextXY);
422  binXYqueue.push(nextXY);
423  }
424  }
425  }
426  }
427  }
428  }
429 
430  return res;
431 }
432 
433 // Node-centric DL algorithm flow at each computation node
434 void ParallelCLBPacker::PackingCLBSite::updateStep(bool initial, bool debug)
435 {
436  if (debug)
437  setDebug();
438  removeInvalidClustersFromPQ();
439  removeClustersIncompatibleWithDetClusterFromPQ();
440  removeInvalidPUsFromNeighborPUs();
441  updateConsistentPUsInTop(); // the PQ top might be kept updated but some of its PUs might be consistent
442 
443  seedClusters = priorityQueue;
444  if (seedClusters.size() == 0)
445  {
446  if (determinedClusterInSite)
447  {
448  seedClusters.push_back(new PackingCLBCluster(determinedClusterInSite));
449  }
450  else
451  {
452  seedClusters.push_back(new PackingCLBCluster(this));
453  }
454  seedClusters[0]->clusterHash();
455  }
456 
457  if (neighborPUs.size() < numNeighbor && curD < maxD)
458  {
459  if (initial)
460  {
461  findNeiborPUsFromBinGrid(DesignInfo::CellType_LUT6, CLBSite->X(), CLBSite->Y(), 0, curD, numNeighbor,
462  PUId2PackingCLBSite, y2xRatio, &neighborPUs);
463  findNeiborPUsFromBinGrid(DesignInfo::CellType_FDCE, CLBSite->X(), CLBSite->Y(), 0, curD, numNeighbor,
464  PUId2PackingCLBSite, y2xRatio, &neighborPUs);
465  }
466  else
467  {
468  float newD = std::min(curD + deltaD, maxD);
469  findNeiborPUsFromBinGrid(DesignInfo::CellType_LUT6, CLBSite->X(), CLBSite->Y(), curD, newD, numNeighbor,
470  PUId2PackingCLBSite, y2xRatio, &neighborPUs);
471  findNeiborPUsFromBinGrid(DesignInfo::CellType_FDCE, CLBSite->X(), CLBSite->Y(), curD, newD, numNeighbor,
472  PUId2PackingCLBSite, y2xRatio, &neighborPUs);
473  curD = newD;
474  }
475  }
476 
477  findNewClustersWithNeighborPUs();
478 
479  refreshPrioryQueue();
480 
481  // if (CLBSite->getName() == "SLICE_X38Y213")
482  // {
483  // std::ofstream debugFile;
484  // debugFile.open("OpenPitonSLICE_SLICE_X38Y213", std::ios_base::app);
485  // assert(debugFile.is_open() && debugFile.good() &&
486  // "The path for placement Tcl dumping does not exist and please check your path settings");
487 
488  // debugFile << "####################################SLICE_X38Y213######################################\n";
489  // for (unsigned int i = 0; i < priorityQueue.size(); i++)
490  // {
491  // debugFile << priorityQueue[i] << "\n";
492  // }
493  // debugFile.close();
494  // }
495  // if (CLBSite->getName() == "SLICE_X38Y215")
496  // {
497  // std::ofstream debugFile;
498  // debugFile.open("OpenPitonSLICE_SLICE_X38Y213", std::ios_base::app);
499  // assert(debugFile.is_open() && debugFile.good() &&
500  // "The path for placement Tcl dumping does not exist and please check your path settings");
501 
502  // debugFile << "####################################SLICE_X38Y215######################################\n";
503  // for (unsigned int i = 0; i < priorityQueue.size(); i++)
504  // {
505  // debugFile << priorityQueue[i] << "\n";
506  // }
507  // debugFile.close();
508  // }
509  if (priorityQueue.size() > PQSize)
510  {
511  int finalPQSize = PQSize;
512  float scoreThreshold = priorityQueue[PQSize - 1]->getScoreInSite() * 0.99;
513  bool extendable = true;
514  for (unsigned int i = PQSize; i < priorityQueue.size(); i++)
515  {
516  if (extendable && priorityQueue[i]->getScoreInSite() > scoreThreshold)
517  {
518  finalPQSize = i + 1;
519  if (finalPQSize > 1.5 * PQSize)
520  {
521  extendable = false;
522  }
523  }
524  else
525  {
526  extendable = false;
527  assert(priorityQueue[i]);
528  delete priorityQueue[i];
529  priorityQueue[i] = nullptr;
530  }
531  }
532  priorityQueue.resize(finalPQSize);
533  }
534 }
535 
537 {
538  if (!cell)
539  return false;
540  return (cell->getOriCellType() == DesignInfo::CellType_LUT6) ||
542 }
543 
545 {
546  assert(checkIsPrePackedSite() && checkIsCarrySite());
547  assert(determinedClusterInSite->getSingleLUTs().size() + determinedClusterInSite->getPairedLUTs().size() <= 8);
548 
549  for (int i = 0; i < 24; i++)
550  finalMapToSlotsForCarrySite(i);
551 
552  for (int targetFFSetId = 0; targetFFSetId < 4; targetFFSetId++)
553  {
554  if (determinedClusterInSite->evictFFsFromCarryHalfCLB(targetFFSetId))
555  {
556  for (int i = 0; i < 24; i++)
557  finalMapToSlotsForCarrySite(i);
558  determinedClusterInSite->recoverFFControlSets();
559  }
560  }
561 
562  slotMapping = best_SlotMapping;
563  mappedCells = best_mappedCells;
564  mappedLUTs = best_mappedLUTs;
565  mappedFFs = best_mappedFFs;
566  moveLUTToLUT6Slot();
567 }
568 
570 {
571  assert(isCarrySite);
572 
573  int FFSwapOption[24][4] = {{0, 1, 2, 3}, {0, 1, 3, 2}, {0, 2, 1, 3}, {0, 2, 3, 1}, {0, 3, 1, 2}, {0, 3, 2, 1},
574  {1, 0, 2, 3}, {1, 0, 3, 2}, {1, 2, 0, 3}, {1, 2, 3, 0}, {1, 3, 0, 2}, {1, 3, 2, 0},
575  {2, 0, 1, 3}, {2, 0, 3, 1}, {2, 1, 0, 3}, {2, 1, 3, 0}, {2, 3, 0, 1}, {2, 3, 1, 0},
576  {3, 0, 1, 2}, {3, 0, 2, 1}, {3, 1, 0, 2}, {3, 1, 2, 0}, {3, 2, 0, 1}, {3, 2, 1, 0}};
577 
578  slotMapping = SiteBELMapping();
579  mappedCells.clear();
580  mappedLUTs.clear();
581  mappedFFs.clear();
582  mapCarryRelatedCellsToSlots(CARRYChain, CARRYChainSiteOffset);
583  std::map<DesignInfo::DesignCell *, DesignInfo::DesignCell *> FF2LUT;
584  std::map<DesignInfo::DesignCell *, DesignInfo::DesignCell *> LUT2FF;
585  auto singleLUTs = determinedClusterInSite->getSortedSingleLUTs();
586  auto pairedLUTs = determinedClusterInSite->getSortedPairedLUTs();
587 
588  for (int i = 0; i < 2; i++)
589  {
590  for (int j = 0; j < 2; j++)
591  {
592  int halfCLBId = i * 2 + j;
593  auto &CSFF = determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId]];
594  int anotherHalfCLBId = i * 2 + (1 - j);
595  auto &anotherCSFF =
596  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][anotherHalfCLBId]];
597  if (!compatibleInOneHalfCLB(FFSwapOption[FFControlSetOrderId][halfCLBId],
598  FFSwapOption[FFControlSetOrderId][anotherHalfCLBId]))
599  {
600  return;
601  }
602  if (halfCLBId != FFSwapOption[FFControlSetOrderId][halfCLBId])
603  {
604  for (auto FF : CSFF.getFFs())
605  {
607  {
608  return;
609  }
610  }
611  }
612  if (anotherHalfCLBId != FFSwapOption[FFControlSetOrderId][anotherHalfCLBId])
613  {
614  for (auto FF : anotherCSFF.getFFs())
615  {
617  {
618  return;
619  }
620  }
621  }
622  }
623  }
624 
625  // mapped FFs
626  for (int i = 0; i < 2; i++)
627  {
628  for (int j = 0; j < 2; j++)
629  {
630  for (int k = 0; k < 4; k++)
631  {
632  if (!slotMapping.FFs[i][j][k])
633  {
634  int halfCLBId = i * 2 + j;
635  auto &CSFF =
636  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId]];
637 
638  for (auto tmpFF : CSFF.getFFs())
639  {
640  if (mappedFFs.find(tmpFF) == mappedFFs.end())
641  {
642  mappedFFs.insert(tmpFF);
643  mappedCells.insert(tmpFF);
644  slotMapping.FFs[i][j][k] = tmpFF;
645  break;
646  }
647  }
648  }
649  }
650  }
651  }
652 
653  for (auto &FFSet : determinedClusterInSite->getFFControlSets())
654  {
655  for (auto curFF : FFSet.getFFs())
656  {
657  PlacementInfo::PlacementMacro *pairMacro =
659  if (pairMacro)
660  {
662  {
663  assert(pairMacro->getCells().size() == 2);
664  assert(pairMacro->getCells()[0]->isLUT());
665  assert(pairMacro->getCells()[1]->isFF());
666  FF2LUT[pairMacro->getCells()[1]] = pairMacro->getCells()[0];
667  LUT2FF[pairMacro->getCells()[0]] = pairMacro->getCells()[1];
668  }
669  }
670  }
671  }
672 
673  assert(fixedPairedLUTs.size() + determinedClusterInSite->getSingleLUTs().size() +
674  determinedClusterInSite->getPairedLUTs().size() <=
675  8);
676  // map LUT connected to carry and their paired LUT
677  for (int i = 0; i < 2; i++)
678  {
679  for (int j = 0; j < 2; j++)
680  {
681  for (int k = 0; k < 4; k++)
682  {
683  if (!slotMapping.LUTs[i][j][k])
684  {
685  if (slotMapping.LUTs[i][1 - j][k])
686  {
687  if (conflictLUTsContain(slotMapping.LUTs[i][1 - j][k]))
688  {
689  for (auto pair : pairedLUTs)
690  {
691  if (pair.first == slotMapping.LUTs[i][1 - j][k])
692  {
693  slotMapping.LUTs[i][1 - j][k] = pair.first;
694  slotMapping.LUTs[i][j][k] = pair.second;
695  mappedLUTs.insert(slotMapping.LUTs[i][j][k]);
696  mappedLUTs.insert(slotMapping.LUTs[i][1 - j][k]);
697  mappedCells.insert(slotMapping.LUTs[i][j][k]);
698  mappedCells.insert(slotMapping.LUTs[i][1 - j][k]);
699  break;
700  }
701  if (pair.second == slotMapping.LUTs[i][1 - j][k])
702  {
703  slotMapping.LUTs[i][1 - j][k] = pair.second;
704  slotMapping.LUTs[i][j][k] = pair.first;
705  mappedLUTs.insert(slotMapping.LUTs[i][j][k]);
706  mappedLUTs.insert(slotMapping.LUTs[i][1 - j][k]);
707  mappedCells.insert(slotMapping.LUTs[i][j][k]);
708  mappedCells.insert(slotMapping.LUTs[i][1 - j][k]);
709  break;
710  }
711  }
712  }
713  }
714  }
715  }
716  }
717  }
718 
719  // mapped paired LUTs
720  for (int i = 0; i < 2; i++)
721  {
722  for (int k = 0; k < 4; k++)
723  {
724  if (!slotMapping.LUTs[i][0][k] && !slotMapping.LUTs[i][1][k])
725  {
726  for (auto pair : pairedLUTs)
727  {
728  if (mappedLUTs.find(pair.first) == mappedLUTs.end())
729  {
730  if (mappedLUTs.find(pair.second) != mappedLUTs.end())
731  {
732  for (int i = 0; i < 2; i++)
733  {
734  for (int j = 0; j < 2; j++)
735  {
736  for (int k = 0; k < 4; k++)
737  {
738  if (slotMapping.LUTs[i][j][k])
739  {
740  std::cout << "i,j,k:" << i << "," << j << "," << k << " "
741  << slotMapping.LUTs[i][j][k] << "\n";
742  }
743  }
744  }
745  }
746  determinedClusterInSite->printMyself();
747  std::cout.flush();
748  }
749  assert(mappedLUTs.find(pair.second) == mappedLUTs.end());
750  slotMapping.LUTs[i][0][k] = pair.first;
751  slotMapping.LUTs[i][1][k] = pair.second;
752  mappedLUTs.insert(slotMapping.LUTs[i][0][k]);
753  mappedLUTs.insert(slotMapping.LUTs[i][1][k]);
754  mappedCells.insert(slotMapping.LUTs[i][0][k]);
755  mappedCells.insert(slotMapping.LUTs[i][1][k]);
756  break;
757  }
758  }
759  }
760  }
761  }
762 
763  // mapped single LUTs
764  for (int i = 0; i < 2; i++)
765  {
766  for (int k = 0; k < 4; k++)
767  {
768  if (!slotMapping.LUTs[i][0][k] && !slotMapping.LUTs[i][1][k])
769  {
770  for (auto tmpLUT : singleLUTs)
771  {
772  if (mappedLUTs.find(tmpLUT) == mappedLUTs.end())
773  {
774  assert(mappedLUTs.find(tmpLUT) == mappedLUTs.end());
775  slotMapping.LUTs[i][0][k] = tmpLUT;
776  mappedLUTs.insert(tmpLUT);
777  mappedCells.insert(tmpLUT);
778  break;
779  }
780  }
781  }
782  }
783  }
784 
785  assert(fixedPairedLUTs.size() + determinedClusterInSite->getSingleLUTs().size() +
786  determinedClusterInSite->getPairedLUTs().size() <=
787  8);
788 
789  int LUTSwapOptions[4][4] = {{0, 1, 0, 1}, {1, 0, 0, 1}, {1, 0, 1, 0}, {0, 1, 1, 0}};
790 
791  for (int i0 = 0; i0 < 2; i0++)
792  {
793  for (int k0 = 0; k0 < 4; k0++)
794  {
795  for (int i1 = 0; i1 < 2; i1++)
796  {
797  for (int k1 = 0; k1 < 4; k1++)
798  {
799  if (i0 == i1 && k0 == k1)
800  {
801  continue;
802  }
803  if ((isCarryMacro(slotMapping.LUTs[i0][0][k0]) || isCarryMacro(slotMapping.FFs[i0][0][k0]) ||
804  isCarryMacro(slotMapping.LUTs[i0][1][k0]) || isCarryMacro(slotMapping.FFs[i0][1][k0]) ||
805  isCarryMacro(slotMapping.LUTs[i1][0][k1]) || isCarryMacro(slotMapping.FFs[i1][0][k1]) ||
806  isCarryMacro(slotMapping.LUTs[i1][1][k1]) || isCarryMacro(slotMapping.FFs[i1][1][k1])))
807  continue;
808  float oriDirectInternalRoute_slack =
809  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][0][k0], slotMapping.FFs[i0][0][k0]) +
810  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][1][k0], slotMapping.FFs[i0][1][k0]) +
811  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i1][0][k1], slotMapping.FFs[i1][0][k1]) +
812  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i1][1][k1], slotMapping.FFs[i1][1][k1]);
813 
814  float optDirectInternalRoute_slack = -100000000;
815  int optimalOption = -1;
816  for (int optionId = 0; optionId < 4; optionId++)
817  {
818  // switch locations
819  float newDirectInternalRoute_slack =
820  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i1][LUTSwapOptions[optionId][0]][k1],
821  slotMapping.FFs[i0][0][k0]) +
822  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i1][LUTSwapOptions[optionId][1]][k1],
823  slotMapping.FFs[i0][1][k0]) +
824  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][LUTSwapOptions[optionId][2]][k0],
825  slotMapping.FFs[i1][0][k1]) +
826  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][LUTSwapOptions[optionId][3]][k0],
827  slotMapping.FFs[i1][1][k1]);
828  if (isLUT6(slotMapping.LUTs[i1][LUTSwapOptions[optionId][1]][k1]) ||
829  isLUT6(slotMapping
830  .LUTs[i0][LUTSwapOptions[optionId][3]][k0])) // illegal to put LUT6 at LUT5 slot
831  continue;
832  if (newDirectInternalRoute_slack > optDirectInternalRoute_slack)
833  {
834  optimalOption = optionId;
835  optDirectInternalRoute_slack = newDirectInternalRoute_slack;
836  }
837  }
838 
839  if (oriDirectInternalRoute_slack < optDirectInternalRoute_slack)
840  {
841  DesignInfo::DesignCell *tmpLUT0 = slotMapping.LUTs[i1][LUTSwapOptions[optimalOption][0]][k1];
842  DesignInfo::DesignCell *tmpLUT1 = slotMapping.LUTs[i1][LUTSwapOptions[optimalOption][1]][k1];
843  DesignInfo::DesignCell *tmpLUT2 = slotMapping.LUTs[i0][LUTSwapOptions[optimalOption][2]][k0];
844  DesignInfo::DesignCell *tmpLUT3 = slotMapping.LUTs[i0][LUTSwapOptions[optimalOption][3]][k0];
845  slotMapping.LUTs[i0][0][k0] = tmpLUT0;
846  assert(!isLUT6(tmpLUT1));
847  slotMapping.LUTs[i0][1][k0] = tmpLUT1;
848  slotMapping.LUTs[i1][0][k1] = tmpLUT2;
849  assert(!isLUT6(tmpLUT3));
850  slotMapping.LUTs[i1][1][k1] = tmpLUT3;
851  continue;
852  }
853  }
854  }
855  }
856  }
857 
858  for (int i = 0; i < 2; i++)
859  {
860  for (int j = 0; j < 2; j++)
861  {
862  // if ((isCarryMacro(slotMapping.FFs[i][j][0]) || isCarryMacro(slotMapping.FFs[i][j][1]) ||
863  // isCarryMacro(slotMapping.FFs[i][j][2]) || isCarryMacro(slotMapping.FFs[i][j][3])))
864  // continue;
865  float oriDirectInternalRoute_slack =
866  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][0], slotMapping.FFs[i][j][0]) +
867  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][1], slotMapping.FFs[i][j][1]) +
868  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][2], slotMapping.FFs[i][j][2]) +
869  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][3], slotMapping.FFs[i][j][3]);
870  float optDirectInternalRoute_slack = -100000000;
871  int optimalOption = -1;
872  for (int optionId = 0; optionId < 24; optionId++)
873  {
874  bool moveFFofCarry = false;
875  for (int checkFFId = 0; checkFFId < 4; checkFFId++)
876  {
877  if (FFSwapOption[optionId][checkFFId] != checkFFId)
878  {
879  if (isCarryMacro(slotMapping.FFs[i][j][checkFFId]))
880  {
881  moveFFofCarry = true;
882  }
883  }
884  }
885  if (moveFFofCarry)
886  continue;
887 
888  float newDirectInternalRoute_slack =
889  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][0],
890  slotMapping.FFs[i][j][FFSwapOption[optionId][0]]) +
891  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][1],
892  slotMapping.FFs[i][j][FFSwapOption[optionId][1]]) +
893  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][2],
894  slotMapping.FFs[i][j][FFSwapOption[optionId][2]]) +
895  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][3],
896  slotMapping.FFs[i][j][FFSwapOption[optionId][3]]);
897  if (newDirectInternalRoute_slack > optDirectInternalRoute_slack)
898  {
899  optimalOption = optionId;
900  optDirectInternalRoute_slack = newDirectInternalRoute_slack;
901  }
902  }
903  if (oriDirectInternalRoute_slack < optDirectInternalRoute_slack)
904  {
905  DesignInfo::DesignCell *tmpFF0 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][0]];
906  DesignInfo::DesignCell *tmpFF1 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][1]];
907  DesignInfo::DesignCell *tmpFF2 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][2]];
908  DesignInfo::DesignCell *tmpFF3 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][3]];
909  slotMapping.FFs[i][j][0] = tmpFF0;
910  slotMapping.FFs[i][j][1] = tmpFF1;
911  slotMapping.FFs[i][j][2] = tmpFF2;
912  slotMapping.FFs[i][j][3] = tmpFF3;
913  continue;
914  }
915  }
916  }
917 
918  if (determinedClusterInSite)
919  {
920 
921  unsigned int FFCnt = 0;
922  for (auto &CSFF : determinedClusterInSite->getFFControlSets())
923  FFCnt += CSFF.getFFs().size();
924 
925  if (FFCnt != mappedFFs.size())
926  {
927  std::cout << "FFCnt: " << FFCnt << " mappedFFs.size():" << mappedFFs.size() << "\n";
928  determinedClusterInSite->printMyself();
929  for (int i = 0; i < 2; i++)
930  {
931  for (int k = 0; k < 4; k++)
932  {
933  for (int j = 0; j < 2; j++)
934  {
935  std::cout << "i,k,j:" << i << "," << k << "," << j << ":\n";
936  if (slotMapping.LUTs[i][j][k])
937  std::cout << slotMapping.LUTs[i][j][k] << "\n";
938  if (slotMapping.FFs[i][j][k])
939  std::cout << slotMapping.FFs[i][j][k] << "\n";
940  }
941  }
942  }
943  assert(FFCnt == mappedFFs.size());
944  }
945  }
946 
947  std::map<DesignInfo::DesignCell *, std::array<int, 3>> cell2slot;
948  for (int i = 0; i < 2; i++)
949  {
950  for (int j = 0; j < 2; j++)
951  {
952  for (int k = 0; k < 4; k++)
953  {
954  if (slotMapping.FFs[i][j][k])
955  {
956  cell2slot[slotMapping.FFs[i][j][k]] = std::array<int, 3>({i, j, k});
957  }
958  if (slotMapping.LUTs[i][j][k])
959  {
960  cell2slot[slotMapping.LUTs[i][j][k]] = std::array<int, 3>({i, j, k});
961  }
962  }
963  }
964  }
965  // Move FF to empty slot if possible to increase direct connect
966  for (int i0 = 0; i0 < 2; i0++)
967  {
968  for (int j0 = 0; j0 < 2; j0++)
969  {
970  for (int k0 = 0; k0 < 4; k0++)
971  {
972  if (!slotMapping.LUTs[i0][j0][k0])
973  continue;
974  if (LUT2FF.find(slotMapping.LUTs[i0][j0][k0]) != LUT2FF.end())
975  {
976  auto targetFF = LUT2FF[slotMapping.LUTs[i0][j0][k0]];
977  if (slotMapping.FFs[i0][j0][k0] == nullptr)
978  {
979  int halfCLBId0 = i0 * 2 + j0;
980  auto &CSFF0 =
981  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId0]];
982  int i1 = cell2slot[targetFF].at(0);
983  int j1 = cell2slot[targetFF].at(1);
984  int k1 = cell2slot[targetFF].at(2);
985  int halfCLBId1 = i1 * 2 + j1;
986  auto &CSFF1 =
987  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId1]];
988  if (CSFF0.compatibleWith(CSFF1.getCSId()))
989  {
990  if (FFSwapOption[FFControlSetOrderId][halfCLBId1] !=
991  FFSwapOption[FFControlSetOrderId][halfCLBId0])
992  determinedClusterInSite->moveFFFromCS1ToCS0(
993  targetFF, FFSwapOption[FFControlSetOrderId][halfCLBId1],
994  FFSwapOption[FFControlSetOrderId][halfCLBId0]);
995  slotMapping.FFs[i0][j0][k0] = targetFF;
996  slotMapping.FFs[i1][j1][k1] = nullptr;
997  cell2slot[targetFF] = std::array<int, 3>({i0, j0, k0});
998  }
999  }
1000  }
1001  }
1002  }
1003  }
1004 
1005  // Move LUT-FF to empty slot if possible to increase direct connect
1006  for (int i0 = 0; i0 < 2; i0++)
1007  {
1008  for (int j0 = 0; j0 < 2; j0++)
1009  {
1010  for (int k0 = 0; k0 < 4; k0++)
1011  {
1012  if (!slotMapping.LUTs[i0][j0][k0])
1013  continue;
1014  auto targetLUT = slotMapping.LUTs[i0][j0][k0];
1015  if (slotMapping.LUTs[i0][1 - j0][k0])
1016  continue;
1017  if (LUT2FF.find(targetLUT) != LUT2FF.end())
1018  {
1019  auto targetFF = LUT2FF[targetLUT];
1020  int i1 = cell2slot[targetFF].at(0);
1021  int j1 = cell2slot[targetFF].at(1);
1022  int k1 = cell2slot[targetFF].at(2);
1023  int halfCLBId1 = i1 * 2 + j1;
1024 
1025  if (i0 != i1 || j0 != j1 || k0 != k1)
1026  {
1027  bool optimized = false;
1028  for (int ii = 0; ii < 2 && !optimized; ii++)
1029  {
1030  for (int jj = 0; jj < 2 && !optimized; jj++)
1031  {
1032  bool compatible = true;
1033  if (targetLUT->isLUT6() && jj == 1)
1034  continue;
1035  int halfCLBId0 = ii * 2 + jj;
1036  for (int kk = 0; kk < 4; kk++)
1037  {
1038  if (slotMapping.FFs[ii][jj][kk])
1039  {
1040  if (slotMapping.FFs[ii][jj][kk]->getControlSetInfo())
1041  {
1042  if (slotMapping.FFs[ii][jj][kk]->getControlSetInfo()->getId() !=
1043  targetFF->getControlSetInfo()->getId())
1044  {
1045  compatible = false;
1046  }
1047  }
1048  }
1049  }
1050  for (int kk = 0; kk < 4; kk++)
1051  {
1052  if (slotMapping.FFs[ii][1 - jj][kk])
1053  {
1054  if (slotMapping.FFs[ii][1 - jj][kk]->getControlSetInfo())
1055  {
1056  if (slotMapping.FFs[ii][1 - jj][kk]->getControlSetInfo()->getCLK() !=
1057  targetFF->getControlSetInfo()->getCLK() ||
1058  slotMapping.FFs[ii][1 - jj][kk]->getControlSetInfo()->getSR() !=
1059  targetFF->getControlSetInfo()->getSR())
1060  {
1061  compatible = false;
1062  }
1063  }
1064  }
1065  }
1066 
1067  if (compatible)
1068  {
1069  for (int kk = 0; kk < 4; kk++)
1070  {
1071  if (!slotMapping.LUTs[ii][jj][kk] && !slotMapping.FFs[ii][jj][kk])
1072  {
1073  // if (slotMapping.LUTs[ii][1 - jj][kk])
1074  // continue;
1075  if (targetLUT->isLUT6())
1076  {
1077  if (slotMapping.LUTs[ii][1 - jj][kk])
1078  continue;
1079  }
1080  else
1081  {
1082  if (slotMapping.LUTs[ii][1 - jj][kk] &&
1083  slotMapping.LUTs[ii][1 - jj][kk]->isLUT6())
1084  continue;
1085  if (slotMapping.LUTs[ii][1 - jj][kk])
1086  {
1087  int pairPinNum =
1088  getPairPinNum(targetLUT, slotMapping.LUTs[ii][1 - jj][kk]);
1089  if (pairPinNum > 5)
1090  continue;
1091  }
1092  }
1093 
1094  slotMapping.LUTs[i0][j0][k0] = nullptr;
1095  slotMapping.FFs[i1][j1][k1] = nullptr;
1096  assert(!slotMapping.LUTs[ii][jj][kk]);
1097  assert(!slotMapping.FFs[ii][jj][kk]);
1098  slotMapping.LUTs[ii][jj][kk] = targetLUT;
1099  slotMapping.FFs[ii][jj][kk] = targetFF;
1100 
1101  if (FFSwapOption[FFControlSetOrderId][halfCLBId1] !=
1102  FFSwapOption[FFControlSetOrderId][halfCLBId0])
1103  determinedClusterInSite->moveFFFromCS1ToCS0(
1104  targetFF, FFSwapOption[FFControlSetOrderId][halfCLBId1],
1105  FFSwapOption[FFControlSetOrderId][halfCLBId0]);
1106  cell2slot[targetFF] = std::array<int, 3>({ii, jj, kk});
1107  cell2slot[targetLUT] = std::array<int, 3>({ii, jj, kk});
1108  optimized = true;
1109  break;
1110  }
1111  }
1112  }
1113  }
1114  }
1115  }
1116  }
1117  }
1118  }
1119  }
1120 
1121  auto &timingNodes = placementInfo->getTimingInfo()->getSimplePlacementTimingInfo();
1123  float directConnectCnt = 0;
1124  for (int i = 0; i < 2; i++)
1125  {
1126  for (int j = 0; j < 2; j++)
1127  {
1128  for (int k = 0; k < 4; k++)
1129  {
1130  if (checkDirectLUTFFConnect(FF2LUT, slotMapping.LUTs[i][j][k], slotMapping.FFs[i][j][k]))
1131  {
1132  auto srcCell = slotMapping.LUTs[i][j][k];
1133  unsigned int srcCellId = srcCell->getCellId();
1134  auto srcNode = timingNodes[srcCellId];
1135  // int succPathLen = srcNode->getLongestPathLength();
1136  if (srcNode->getForwardLevel() < 0)
1137  continue;
1138  float slack =
1139  (srcNode->getLatestInputArrival() - srcNode->getRequiredArrivalTime()) / clockPeriod + 20;
1140  directConnectCnt += slack;
1141  }
1142  }
1143  }
1144  }
1145  if (directConnectCnt > best_DirectConnect)
1146  {
1147  best_SlotMapping = slotMapping;
1148  best_mappedCells = mappedCells;
1149  best_mappedLUTs = mappedLUTs;
1150  best_mappedFFs = mappedFFs;
1151  best_DirectConnect = directConnectCnt;
1152  }
1153 }
1154 
1156 {
1157  int FFSwapOption[24][4] = {{0, 1, 2, 3}, {0, 1, 3, 2}, {0, 2, 1, 3}, {0, 2, 3, 1}, {0, 3, 1, 2}, {0, 3, 2, 1},
1158  {1, 0, 2, 3}, {1, 0, 3, 2}, {1, 2, 0, 3}, {1, 2, 3, 0}, {1, 3, 0, 2}, {1, 3, 2, 0},
1159  {2, 0, 1, 3}, {2, 0, 3, 1}, {2, 1, 0, 3}, {2, 1, 3, 0}, {2, 3, 0, 1}, {2, 3, 1, 0},
1160  {3, 0, 1, 2}, {3, 0, 2, 1}, {3, 1, 0, 2}, {3, 1, 2, 0}, {3, 2, 0, 1}, {3, 2, 1, 0}};
1161 
1162  slotMapping = SiteBELMapping();
1163  mappedCells.clear();
1164  mappedLUTs.clear();
1165  mappedFFs.clear();
1166  std::map<DesignInfo::DesignCell *, DesignInfo::DesignCell *> FF2LUT;
1167  std::map<DesignInfo::DesignCell *, DesignInfo::DesignCell *> LUT2FF;
1168  auto singleLUTs = determinedClusterInSite->getSortedSingleLUTs();
1169  auto pairedLUTs = determinedClusterInSite->getSortedPairedLUTs();
1170 
1171  for (int i = 0; i < 2; i++)
1172  {
1173  for (int j = 0; j < 2; j++)
1174  {
1175  int halfCLBId = i * 2 + j;
1176  auto &CSFF = determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId]];
1177  int anotherHalfCLBId = i * 2 + (1 - j);
1178  auto &anotherCSFF =
1179  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][anotherHalfCLBId]];
1180  if (!compatibleInOneHalfCLB(FFSwapOption[FFControlSetOrderId][halfCLBId],
1181  FFSwapOption[FFControlSetOrderId][anotherHalfCLBId]))
1182  {
1183  return;
1184  }
1185  if (halfCLBId != FFSwapOption[FFControlSetOrderId][halfCLBId])
1186  {
1187  for (auto FF : CSFF.getFFs())
1188  {
1190  {
1191  return;
1192  }
1193  }
1194  }
1195  if (anotherHalfCLBId != FFSwapOption[FFControlSetOrderId][anotherHalfCLBId])
1196  {
1197  for (auto FF : anotherCSFF.getFFs())
1198  {
1200  {
1201  return;
1202  }
1203  }
1204  }
1205  }
1206  }
1207 
1208  // mapped FFs
1209  for (int i = 0; i < 2; i++)
1210  {
1211  for (int j = 0; j < 2; j++)
1212  {
1213  for (int k = 0; k < 4; k++)
1214  {
1215  if (!slotMapping.FFs[i][j][k])
1216  {
1217  int halfCLBId = i * 2 + j;
1218  auto &CSFF =
1219  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId]];
1220 
1221  for (auto tmpFF : CSFF.getFFs())
1222  {
1223  if (mappedFFs.find(tmpFF) == mappedFFs.end())
1224  {
1225  mappedFFs.insert(tmpFF);
1226  mappedCells.insert(tmpFF);
1227  slotMapping.FFs[i][j][k] = tmpFF;
1228  break;
1229  }
1230  }
1231  }
1232  }
1233  }
1234  }
1235 
1236  // check directConnect
1237  for (auto &FFSet : determinedClusterInSite->getFFControlSets())
1238  {
1239  for (auto curFF : FFSet.getFFs())
1240  {
1241  PlacementInfo::PlacementMacro *pairMacro =
1243  if (pairMacro)
1244  {
1246  {
1247  assert(pairMacro->getCells().size() == 2);
1248  assert(pairMacro->getCells()[0]->isLUT());
1249  assert(pairMacro->getCells()[1]->isFF());
1250  FF2LUT[pairMacro->getCells()[1]] = pairMacro->getCells()[0];
1251  LUT2FF[pairMacro->getCells()[0]] = pairMacro->getCells()[1];
1252  }
1253  }
1254  }
1255  }
1256 
1257  assert(singleLUTs.size() + pairedLUTs.size() <= 8);
1258  // map LUT connected to carry and their paired LUT
1259  for (int i = 0; i < 2; i++)
1260  {
1261  for (int j = 0; j < 2; j++)
1262  {
1263  for (int k = 0; k < 4; k++)
1264  {
1265  if (!slotMapping.LUTs[i][j][k])
1266  {
1267  if (slotMapping.LUTs[i][1 - j][k])
1268  {
1269  for (auto pair : pairedLUTs)
1270  {
1271  if (pair.first == slotMapping.LUTs[i][1 - j][k])
1272  {
1273  slotMapping.LUTs[i][1 - j][k] = pair.first;
1274  slotMapping.LUTs[i][j][k] = pair.second;
1275  mappedLUTs.insert(slotMapping.LUTs[i][j][k]);
1276  mappedLUTs.insert(slotMapping.LUTs[i][1 - j][k]);
1277  mappedCells.insert(slotMapping.LUTs[i][j][k]);
1278  mappedCells.insert(slotMapping.LUTs[i][1 - j][k]);
1279  break;
1280  }
1281  if (pair.second == slotMapping.LUTs[i][1 - j][k])
1282  {
1283  slotMapping.LUTs[i][1 - j][k] = pair.second;
1284  slotMapping.LUTs[i][j][k] = pair.first;
1285  mappedLUTs.insert(slotMapping.LUTs[i][j][k]);
1286  mappedLUTs.insert(slotMapping.LUTs[i][1 - j][k]);
1287  mappedCells.insert(slotMapping.LUTs[i][j][k]);
1288  mappedCells.insert(slotMapping.LUTs[i][1 - j][k]);
1289  break;
1290  }
1291  }
1292  }
1293  }
1294  }
1295  }
1296  }
1297 
1298  // mapped paired LUTs
1299  for (int i = 0; i < 2; i++)
1300  {
1301  for (int k = 0; k < 4; k++)
1302  {
1303  if (!slotMapping.LUTs[i][0][k] && !slotMapping.LUTs[i][1][k])
1304  {
1305  for (auto pair : pairedLUTs)
1306  {
1307  if (mappedLUTs.find(pair.first) == mappedLUTs.end())
1308  {
1309  if (mappedLUTs.find(pair.second) != mappedLUTs.end())
1310  {
1311  for (int i = 0; i < 2; i++)
1312  {
1313  for (int j = 0; j < 2; j++)
1314  {
1315  for (int k = 0; k < 4; k++)
1316  {
1317  if (slotMapping.LUTs[i][j][k])
1318  {
1319  std::cout << "i,j,k:" << i << "," << j << "," << k << " "
1320  << slotMapping.LUTs[i][j][k] << "\n";
1321  }
1322  }
1323  }
1324  }
1325  determinedClusterInSite->printMyself();
1326  std::cout.flush();
1327  }
1328  assert(mappedLUTs.find(pair.second) == mappedLUTs.end());
1329  slotMapping.LUTs[i][0][k] = pair.first;
1330  slotMapping.LUTs[i][1][k] = pair.second;
1331  mappedLUTs.insert(slotMapping.LUTs[i][0][k]);
1332  mappedLUTs.insert(slotMapping.LUTs[i][1][k]);
1333  mappedCells.insert(slotMapping.LUTs[i][0][k]);
1334  mappedCells.insert(slotMapping.LUTs[i][1][k]);
1335  break;
1336  }
1337  }
1338  }
1339  }
1340  }
1341 
1342  // mapped single LUTs
1343  for (int i = 0; i < 2; i++)
1344  {
1345  for (int k = 0; k < 4; k++)
1346  {
1347  if (!slotMapping.LUTs[i][0][k] && !slotMapping.LUTs[i][1][k])
1348  {
1349  for (auto tmpLUT : singleLUTs)
1350  {
1351  if (mappedLUTs.find(tmpLUT) == mappedLUTs.end())
1352  {
1353  assert(mappedLUTs.find(tmpLUT) == mappedLUTs.end());
1354  slotMapping.LUTs[i][0][k] = tmpLUT;
1355  mappedLUTs.insert(tmpLUT);
1356  mappedCells.insert(tmpLUT);
1357  break;
1358  }
1359  }
1360  }
1361  }
1362  }
1363 
1364  // assert(determinedClusterInSite->getSingleLUTs().size() + determinedClusterInSite->getPairedLUTs().size() * 2 ==
1365  // mappedLUTs.size());
1366 
1367  assert(singleLUTs.size() + pairedLUTs.size() <= 8);
1368  if (determinedClusterInSite)
1369  {
1370 
1371  unsigned int FFCnt = 0;
1372  for (auto &CSFF : determinedClusterInSite->getFFControlSets())
1373  FFCnt += CSFF.getFFs().size();
1374 
1375  if (FFCnt != mappedFFs.size())
1376  {
1377  std::cout << "FFCnt: " << FFCnt << " mappedFFs.size():" << mappedFFs.size() << "\n";
1378  determinedClusterInSite->printMyself();
1379  for (int i = 0; i < 2; i++)
1380  {
1381  for (int k = 0; k < 4; k++)
1382  {
1383  for (int j = 0; j < 2; j++)
1384  {
1385  std::cout << "i,k,j:" << i << "," << k << "," << j << ":\n";
1386  if (slotMapping.LUTs[i][j][k])
1387  std::cout << slotMapping.LUTs[i][j][k] << "\n";
1388  if (slotMapping.FFs[i][j][k])
1389  std::cout << slotMapping.FFs[i][j][k] << "\n";
1390  }
1391  }
1392  }
1393  assert(FFCnt == mappedFFs.size());
1394  }
1395  }
1396 
1397  for (int i0 = 0; i0 < 2; i0++)
1398  {
1399  for (int k0 = 0; k0 < 4; k0++)
1400  {
1401  if (!isLUT6(slotMapping.LUTs[i0][0][k0]) && !isLUT6(slotMapping.LUTs[i0][1][k0]))
1402  {
1403  float oriDirectInternalRoute_slack =
1404  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][0][k0], slotMapping.FFs[i0][0][k0]) +
1405  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][1][k0], slotMapping.FFs[i0][1][k0]);
1406 
1407  // switch locations
1408  float newDirectInternalRoute_slack =
1409  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][0][k0], slotMapping.FFs[i0][1][k0]) +
1410  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][1][k0], slotMapping.FFs[i0][0][k0]);
1411 
1412  if (oriDirectInternalRoute_slack < newDirectInternalRoute_slack)
1413  {
1414  DesignInfo::DesignCell *tmpLUT;
1415  tmpLUT = slotMapping.LUTs[i0][0][k0];
1416  slotMapping.LUTs[i0][0][k0] = slotMapping.LUTs[i0][1][k0];
1417  slotMapping.LUTs[i0][1][k0] = tmpLUT;
1418  assert(!isLUT6(tmpLUT));
1419  }
1420  }
1421  }
1422  }
1423 
1424  int LUTSwapOptions[4][4] = {{0, 1, 0, 1}, {1, 0, 0, 1}, {1, 0, 1, 0}, {0, 1, 1, 0}};
1425 
1426  for (int attempt = 0; attempt < 2; attempt++)
1427  {
1428  for (int i0 = 0; i0 < 2; i0++)
1429  {
1430  for (int k0 = 0; k0 < 4; k0++)
1431  {
1432  for (int i1 = 0; i1 < 2; i1++)
1433  {
1434  for (int k1 = 0; k1 < 4; k1++)
1435  {
1436  if (i0 == i1 && k0 == k1)
1437  {
1438  continue;
1439  }
1440  float oriDirectInternalRoute_slack =
1441  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][0][k0],
1442  slotMapping.FFs[i0][0][k0]) +
1443  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][1][k0],
1444  slotMapping.FFs[i0][1][k0]) +
1445  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i1][0][k1],
1446  slotMapping.FFs[i1][0][k1]) +
1447  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i1][1][k1],
1448  slotMapping.FFs[i1][1][k1]);
1449 
1450  float optDirectInternalRoute_slack = -100000000;
1451  int optimalOption = -1;
1452  for (int optionId = 0; optionId < 4; optionId++)
1453  {
1454  // switch locations
1455  float newDirectInternalRoute_slack =
1456  checkDirectLUTFFConnect_slack(FF2LUT,
1457  slotMapping.LUTs[i1][LUTSwapOptions[optionId][0]][k1],
1458  slotMapping.FFs[i0][0][k0]) +
1459  checkDirectLUTFFConnect_slack(FF2LUT,
1460  slotMapping.LUTs[i1][LUTSwapOptions[optionId][1]][k1],
1461  slotMapping.FFs[i0][1][k0]) +
1462  checkDirectLUTFFConnect_slack(FF2LUT,
1463  slotMapping.LUTs[i0][LUTSwapOptions[optionId][2]][k0],
1464  slotMapping.FFs[i1][0][k1]) +
1465  checkDirectLUTFFConnect_slack(FF2LUT,
1466  slotMapping.LUTs[i0][LUTSwapOptions[optionId][3]][k0],
1467  slotMapping.FFs[i1][1][k1]);
1468  if (isLUT6(slotMapping.LUTs[i1][LUTSwapOptions[optionId][1]][k1]) ||
1469  isLUT6(slotMapping.LUTs[i0][LUTSwapOptions[optionId][3]]
1470  [k0])) // illegal to put LUT6 at LUT5 slot
1471  continue;
1472  if (newDirectInternalRoute_slack > optDirectInternalRoute_slack)
1473  {
1474  optimalOption = optionId;
1475  optDirectInternalRoute_slack = newDirectInternalRoute_slack;
1476  }
1477  }
1478 
1479  if (oriDirectInternalRoute_slack < optDirectInternalRoute_slack)
1480  {
1481  DesignInfo::DesignCell *tmpLUT0 =
1482  slotMapping.LUTs[i1][LUTSwapOptions[optimalOption][0]][k1];
1483  DesignInfo::DesignCell *tmpLUT1 =
1484  slotMapping.LUTs[i1][LUTSwapOptions[optimalOption][1]][k1];
1485  DesignInfo::DesignCell *tmpLUT2 =
1486  slotMapping.LUTs[i0][LUTSwapOptions[optimalOption][2]][k0];
1487  DesignInfo::DesignCell *tmpLUT3 =
1488  slotMapping.LUTs[i0][LUTSwapOptions[optimalOption][3]][k0];
1489  slotMapping.LUTs[i0][0][k0] = tmpLUT0;
1490  assert(!isLUT6(tmpLUT1));
1491  slotMapping.LUTs[i0][1][k0] = tmpLUT1;
1492  slotMapping.LUTs[i1][0][k1] = tmpLUT2;
1493  assert(!isLUT6(tmpLUT3));
1494  slotMapping.LUTs[i1][1][k1] = tmpLUT3;
1495  continue;
1496  }
1497  }
1498  }
1499  }
1500  }
1501  }
1502 
1503  for (int i = 0; i < 2; i++)
1504  {
1505  for (int j = 0; j < 2; j++)
1506  {
1507  float oriDirectInternalRoute_slack =
1508  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][0], slotMapping.FFs[i][j][0]) +
1509  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][1], slotMapping.FFs[i][j][1]) +
1510  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][2], slotMapping.FFs[i][j][2]) +
1511  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][3], slotMapping.FFs[i][j][3]);
1512  float optDirectInternalRoute_slack = -100000000;
1513  int optimalOption = -1;
1514  for (int optionId = 0; optionId < 24; optionId++)
1515  {
1516  float newDirectInternalRoute_slack =
1517  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][0],
1518  slotMapping.FFs[i][j][FFSwapOption[optionId][0]]) +
1519  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][1],
1520  slotMapping.FFs[i][j][FFSwapOption[optionId][1]]) +
1521  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][2],
1522  slotMapping.FFs[i][j][FFSwapOption[optionId][2]]) +
1523  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][3],
1524  slotMapping.FFs[i][j][FFSwapOption[optionId][3]]);
1525  if (newDirectInternalRoute_slack > optDirectInternalRoute_slack)
1526  {
1527  optimalOption = optionId;
1528  optDirectInternalRoute_slack = newDirectInternalRoute_slack;
1529  }
1530  }
1531  if (oriDirectInternalRoute_slack < optDirectInternalRoute_slack)
1532  {
1533  DesignInfo::DesignCell *tmpFF0 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][0]];
1534  DesignInfo::DesignCell *tmpFF1 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][1]];
1535  DesignInfo::DesignCell *tmpFF2 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][2]];
1536  DesignInfo::DesignCell *tmpFF3 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][3]];
1537  slotMapping.FFs[i][j][0] = tmpFF0;
1538  slotMapping.FFs[i][j][1] = tmpFF1;
1539  slotMapping.FFs[i][j][2] = tmpFF2;
1540  slotMapping.FFs[i][j][3] = tmpFF3;
1541  continue;
1542  }
1543  }
1544  }
1545 
1546  std::map<DesignInfo::DesignCell *, std::array<int, 3>> cell2slot;
1547  for (int i = 0; i < 2; i++)
1548  {
1549  for (int j = 0; j < 2; j++)
1550  {
1551  for (int k = 0; k < 4; k++)
1552  {
1553  if (slotMapping.FFs[i][j][k])
1554  {
1555  cell2slot[slotMapping.FFs[i][j][k]] = std::array<int, 3>({i, j, k});
1556  }
1557  if (slotMapping.LUTs[i][j][k])
1558  {
1559  cell2slot[slotMapping.LUTs[i][j][k]] = std::array<int, 3>({i, j, k});
1560  }
1561  }
1562  }
1563  }
1564 
1565  // Move FF to empty slot if possible to increase direct connect
1566  for (int i0 = 0; i0 < 2; i0++)
1567  {
1568  for (int j0 = 0; j0 < 2; j0++)
1569  {
1570  for (int k0 = 0; k0 < 4; k0++)
1571  {
1572  if (!slotMapping.LUTs[i0][j0][k0])
1573  continue;
1574  if (LUT2FF.find(slotMapping.LUTs[i0][j0][k0]) != LUT2FF.end())
1575  {
1576  auto targetFF = LUT2FF[slotMapping.LUTs[i0][j0][k0]];
1577  if (slotMapping.FFs[i0][j0][k0] == nullptr)
1578  {
1579  int halfCLBId0 = i0 * 2 + j0;
1580  auto &CSFF0 =
1581  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId0]];
1582  int i1 = cell2slot[targetFF].at(0);
1583  int j1 = cell2slot[targetFF].at(1);
1584  int k1 = cell2slot[targetFF].at(2);
1585  int halfCLBId1 = i1 * 2 + j1;
1586  auto &CSFF1 =
1587  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId1]];
1588  if (CSFF0.compatibleWith(CSFF1.getCSId()))
1589  {
1590  if (FFSwapOption[FFControlSetOrderId][halfCLBId1] !=
1591  FFSwapOption[FFControlSetOrderId][halfCLBId0])
1592  determinedClusterInSite->moveFFFromCS1ToCS0(
1593  targetFF, FFSwapOption[FFControlSetOrderId][halfCLBId1],
1594  FFSwapOption[FFControlSetOrderId][halfCLBId0]);
1595  slotMapping.FFs[i0][j0][k0] = targetFF;
1596  slotMapping.FFs[i1][j1][k1] = nullptr;
1597  cell2slot[targetFF] = std::array<int, 3>({i0, j0, k0});
1598  }
1599  }
1600  }
1601  }
1602  }
1603  }
1604 
1605  // Move LUT-FF to empty slot if possible to increase direct connect
1606  for (int i0 = 0; i0 < 2; i0++)
1607  {
1608  for (int j0 = 0; j0 < 2; j0++)
1609  {
1610  for (int k0 = 0; k0 < 4; k0++)
1611  {
1612  if (!slotMapping.LUTs[i0][j0][k0])
1613  continue;
1614  auto targetLUT = slotMapping.LUTs[i0][j0][k0];
1615  if (slotMapping.LUTs[i0][1 - j0][k0])
1616  continue;
1617  if (LUT2FF.find(targetLUT) != LUT2FF.end())
1618  {
1619  auto targetFF = LUT2FF[targetLUT];
1620  int i1 = cell2slot[targetFF].at(0);
1621  int j1 = cell2slot[targetFF].at(1);
1622  int k1 = cell2slot[targetFF].at(2);
1623  int halfCLBId1 = i1 * 2 + j1;
1624 
1625  if (i0 != i1 || j0 != j1 || k0 != k1)
1626  {
1627  bool optimized = false;
1628  for (int ii = 0; ii < 2 && !optimized; ii++)
1629  {
1630  for (int jj = 0; jj < 2 && !optimized; jj++)
1631  {
1632  bool compatible = true;
1633  if (targetLUT->isLUT6() && jj == 1)
1634  continue;
1635  int halfCLBId0 = ii * 2 + jj;
1636  for (int kk = 0; kk < 4; kk++)
1637  {
1638  if (slotMapping.FFs[ii][jj][kk])
1639  {
1640  if (slotMapping.FFs[ii][jj][kk]->getControlSetInfo())
1641  {
1642  if (slotMapping.FFs[ii][jj][kk]->getControlSetInfo()->getId() !=
1643  targetFF->getControlSetInfo()->getId())
1644  {
1645  compatible = false;
1646  }
1647  }
1648  }
1649  }
1650  for (int kk = 0; kk < 4; kk++)
1651  {
1652  if (slotMapping.FFs[ii][1 - jj][kk])
1653  {
1654  if (slotMapping.FFs[ii][1 - jj][kk]->getControlSetInfo())
1655  {
1656  if (slotMapping.FFs[ii][1 - jj][kk]->getControlSetInfo()->getCLK() !=
1657  targetFF->getControlSetInfo()->getCLK() ||
1658  slotMapping.FFs[ii][1 - jj][kk]->getControlSetInfo()->getSR() !=
1659  targetFF->getControlSetInfo()->getSR())
1660  {
1661  compatible = false;
1662  }
1663  }
1664  }
1665  }
1666 
1667  if (compatible)
1668  {
1669  for (int kk = 0; kk < 4; kk++)
1670  {
1671  if (!slotMapping.LUTs[ii][jj][kk] && !slotMapping.FFs[ii][jj][kk])
1672  {
1673  // if (slotMapping.LUTs[ii][1 - jj][kk])
1674  // continue;
1675  if (targetLUT->isLUT6())
1676  {
1677  if (slotMapping.LUTs[ii][1 - jj][kk])
1678  continue;
1679  }
1680  else
1681  {
1682  if (slotMapping.LUTs[ii][1 - jj][kk] &&
1683  slotMapping.LUTs[ii][1 - jj][kk]->isLUT6())
1684  continue;
1685  if (slotMapping.LUTs[ii][1 - jj][kk])
1686  {
1687  int pairPinNum =
1688  getPairPinNum(targetLUT, slotMapping.LUTs[ii][1 - jj][kk]);
1689  if (pairPinNum > 5)
1690  continue;
1691  }
1692  }
1693 
1694  slotMapping.LUTs[i0][j0][k0] = nullptr;
1695  slotMapping.FFs[i1][j1][k1] = nullptr;
1696  assert(!slotMapping.LUTs[ii][jj][kk]);
1697  assert(!slotMapping.FFs[ii][jj][kk]);
1698  slotMapping.LUTs[ii][jj][kk] = targetLUT;
1699  slotMapping.FFs[ii][jj][kk] = targetFF;
1700 
1701  if (FFSwapOption[FFControlSetOrderId][halfCLBId1] !=
1702  FFSwapOption[FFControlSetOrderId][halfCLBId0])
1703  determinedClusterInSite->moveFFFromCS1ToCS0(
1704  targetFF, FFSwapOption[FFControlSetOrderId][halfCLBId1],
1705  FFSwapOption[FFControlSetOrderId][halfCLBId0]);
1706  cell2slot[targetFF] = std::array<int, 3>({ii, jj, kk});
1707  cell2slot[targetLUT] = std::array<int, 3>({ii, jj, kk});
1708  optimized = true;
1709  break;
1710  }
1711  }
1712  }
1713  }
1714  }
1715  }
1716  }
1717  }
1718  }
1719  }
1720 
1721  for (int i0 = 0; i0 < 2; i0++)
1722  {
1723  for (int k0 = 0; k0 < 4; k0++)
1724  {
1725  if (slotMapping.LUTs[i0][0][k0])
1726  {
1727  if (slotMapping.LUTs[i0][0][k0]->isLUT6())
1728  {
1729  assert(!slotMapping.LUTs[i0][1][k0]);
1730  }
1731  else if (slotMapping.LUTs[i0][1][k0])
1732  {
1733  assert(!slotMapping.LUTs[i0][1][k0]->isLUT6());
1734  int pairPinNum = getPairPinNum(slotMapping.LUTs[i0][1][k0], slotMapping.LUTs[i0][0][k0]);
1735  assert(pairPinNum <= 5);
1736  }
1737  }
1738  }
1739  }
1740 
1741  auto &timingNodes = placementInfo->getTimingInfo()->getSimplePlacementTimingInfo();
1743  float directConnectCnt = 0;
1744  for (int i = 0; i < 2; i++)
1745  {
1746  for (int j = 0; j < 2; j++)
1747  {
1748  for (int k = 0; k < 4; k++)
1749  {
1750  if (checkDirectLUTFFConnect(FF2LUT, slotMapping.LUTs[i][j][k], slotMapping.FFs[i][j][k]))
1751  {
1752  auto srcCell = slotMapping.LUTs[i][j][k];
1753  unsigned int srcCellId = srcCell->getCellId();
1754  auto srcNode = timingNodes[srcCellId];
1755  // int succPathLen = srcNode->getLongestPathLength();
1756  if (srcNode->getForwardLevel() < 0)
1757  continue;
1758  float slack =
1759  (srcNode->getLatestInputArrival() - srcNode->getRequiredArrivalTime()) / clockPeriod + 20;
1760  directConnectCnt += slack;
1761  }
1762  }
1763  }
1764  }
1765  if (directConnectCnt > best_DirectConnect)
1766  {
1767  best_SlotMapping = slotMapping;
1768  best_mappedCells = mappedCells;
1769  best_mappedLUTs = mappedLUTs;
1770  best_mappedFFs = mappedFFs;
1771  best_DirectConnect = directConnectCnt;
1772  }
1773 }
1774 
1776 {
1777  assert(muxF8Offset < 2);
1778  slotMapping.MuxF8[muxF8Offset] = MUXF8Macro->getCells()[0];
1779  assert(slotMapping.MuxF8[muxF8Offset]->getOriCellType() == DesignInfo::CellType_MUXF8);
1780  auto curMUXF8 = slotMapping.MuxF8[muxF8Offset];
1781  std::vector<DesignInfo::DesignCell *> virtualLUTs;
1782  std::vector<DesignInfo::DesignCell *> virtualFFs;
1783  std::vector<DesignInfo::DesignCell *> virtualMUXs;
1784  std::vector<DesignInfo::DesignCell *> LUTsInMacro;
1785  std::vector<DesignInfo::DesignCell *> FFsInMacro;
1786  std::vector<DesignInfo::DesignCell *> MUXsInMacro;
1787  for (auto curCell : MUXF8Macro->getCells())
1788  {
1789  if (curCell->isVirtualCell())
1790  {
1791  if (curCell->isMux())
1792  virtualMUXs.push_back(curCell);
1793  if (curCell->isFF())
1794  virtualFFs.push_back(curCell);
1795  if (curCell->isLUT())
1796  virtualLUTs.push_back(curCell);
1797  }
1798  else
1799  {
1800  if (curCell->isMux())
1801  MUXsInMacro.push_back(curCell);
1802  if (curCell->isFF())
1803  FFsInMacro.push_back(curCell);
1804  if (curCell->isLUT())
1805  LUTsInMacro.push_back(curCell);
1806  }
1807  }
1808 
1809  // map MuxF7
1810  for (DesignInfo::DesignPin *pinBeDriven : curMUXF8->getInputPins())
1811  {
1812  if (!pinBeDriven->getDriverPin())
1813  continue;
1814  if (pinBeDriven->getRefPinName() == "I0")
1815  {
1816  auto I0MuxF7 = pinBeDriven->getDriverPin()->getCell();
1817  if (MUXF8Macro->hasCell(I0MuxF7))
1818  {
1819  assert(I0MuxF7->getCellType() == DesignInfo::CellType_MUXF7);
1820  slotMapping.MuxF7[muxF8Offset][1] = I0MuxF7; // I0 is for the upper one
1821  }
1822  }
1823  else if (pinBeDriven->getRefPinName() == "I1")
1824  {
1825  auto I1MuxF7 = pinBeDriven->getDriverPin()->getCell();
1826  if (MUXF8Macro->hasCell(I1MuxF7))
1827  {
1828  assert(I1MuxF7->getCellType() == DesignInfo::CellType_MUXF7);
1829  slotMapping.MuxF7[muxF8Offset][0] = I1MuxF7; // I1 is for the lower one
1830  }
1831  }
1832  }
1833 
1834  // fill route-thru MuxF7
1835  unsigned int fillVirtualCellCnt = 0;
1836  for (int i = 0; i < 2; i++)
1837  {
1838  if (!slotMapping.MuxF7[muxF8Offset][i])
1839  {
1840  assert(fillVirtualCellCnt < virtualMUXs.size());
1841  slotMapping.MuxF7[muxF8Offset][i] = virtualMUXs[fillVirtualCellCnt];
1842  fillVirtualCellCnt++;
1843  }
1844  }
1845  assert(fillVirtualCellCnt == virtualMUXs.size());
1846 
1847  // fill LUTs into slots
1848  for (int i = 0; i < 2; i++)
1849  {
1850  auto curMuxF7 = slotMapping.MuxF7[muxF8Offset][i];
1851  for (DesignInfo::DesignPin *pinBeDriven : curMuxF7->getInputPins())
1852  {
1853  if (!pinBeDriven->getDriverPin())
1854  continue;
1855  if (pinBeDriven->getRefPinName() == "I0")
1856  {
1857  auto I0LUT = pinBeDriven->getDriverPin()->getCell();
1858  if (MUXF8Macro->hasCell(I0LUT))
1859  {
1860  assert(I0LUT->isLUT6());
1861  slotMapping.LUTs[muxF8Offset][0][i * 2 + 1] = I0LUT; // I0 is for the upper one
1862  }
1863  }
1864  else if (pinBeDriven->getRefPinName() == "I1")
1865  {
1866  auto I1LUT = pinBeDriven->getDriverPin()->getCell();
1867  if (MUXF8Macro->hasCell(I1LUT))
1868  {
1869  assert(I1LUT->isLUT6());
1870  slotMapping.LUTs[muxF8Offset][0][i * 2] = I1LUT; // I1 is for the lower one
1871  }
1872  }
1873  }
1874  }
1875 
1876  // fill route-thru LUTs
1877  fillVirtualCellCnt = 0;
1878  for (int i = 0; i < 4; i++)
1879  {
1880  if (!slotMapping.LUTs[muxF8Offset][0][i])
1881  {
1882  assert(fillVirtualCellCnt < virtualLUTs.size());
1883  slotMapping.LUTs[muxF8Offset][0][i] = virtualLUTs[fillVirtualCellCnt];
1884  fillVirtualCellCnt++;
1885  }
1886  }
1887  assert(fillVirtualCellCnt == virtualLUTs.size());
1888 
1889  // fill selection signal FFs
1890  if (3 == virtualFFs.size())
1891  {
1892  slotMapping.FFs[muxF8Offset][0][0] = virtualFFs[0];
1893  slotMapping.FFs[muxF8Offset][0][1] = virtualFFs[1];
1894  slotMapping.FFs[muxF8Offset][0][2] = virtualFFs[2];
1895  }
1896  else if (2 == virtualFFs.size())
1897  {
1898  assert(FFsInMacro.size() == 1);
1899  slotMapping.FFs[muxF8Offset][0][0] = virtualFFs[0];
1900  slotMapping.FFs[muxF8Offset][0][1] = virtualFFs[1];
1901  slotMapping.FFs[muxF8Offset][0][2] = FFsInMacro[0];
1902  }
1903  else
1904  {
1905  assert(false && "undefined situation");
1906  }
1907 
1908  for (auto tmpCell : MUXF8Macro->getCells())
1909  {
1910  mappedCells.insert(tmpCell);
1911  if (tmpCell->isLUT())
1912  mappedLUTs.insert(tmpCell);
1913  else if (tmpCell->isFF())
1914  mappedFFs.insert(tmpCell);
1915  }
1916 }
1917 
1919 {
1920  assert(halfCLBOffset < 2);
1921  unsigned int F7Offset = 0;
1922  if (slotMapping.MuxF7[halfCLBOffset][F7Offset])
1923  F7Offset++;
1924  if (slotMapping.MuxF7[halfCLBOffset][F7Offset])
1925  {
1926  std::cout << determinedClusterInSite << "\n";
1927  std::cout.flush();
1928  }
1929  assert(!slotMapping.MuxF7[halfCLBOffset][F7Offset]);
1930  slotMapping.MuxF7[halfCLBOffset][F7Offset] = MUXF7Macro->getCells()[0];
1931  assert(slotMapping.MuxF7[halfCLBOffset][F7Offset]->getOriCellType() == DesignInfo::CellType_MUXF7);
1932  auto curMUXF7 = slotMapping.MuxF7[halfCLBOffset][F7Offset];
1933  std::vector<DesignInfo::DesignCell *> virtualLUTs;
1934  std::vector<DesignInfo::DesignCell *> virtualFFs;
1935  std::vector<DesignInfo::DesignCell *> virtualMUXs;
1936  std::vector<DesignInfo::DesignCell *> LUTsInMacro;
1937  std::vector<DesignInfo::DesignCell *> FFsInMacro;
1938  std::vector<DesignInfo::DesignCell *> MUXsInMacro;
1939  for (auto curCell : MUXF7Macro->getCells())
1940  {
1941  if (curCell->isVirtualCell())
1942  {
1943  if (curCell->isMux())
1944  virtualMUXs.push_back(curCell);
1945  if (curCell->isFF())
1946  virtualFFs.push_back(curCell);
1947  if (curCell->isLUT())
1948  virtualLUTs.push_back(curCell);
1949  }
1950  else
1951  {
1952  if (curCell->isMux())
1953  MUXsInMacro.push_back(curCell);
1954  if (curCell->isFF())
1955  FFsInMacro.push_back(curCell);
1956  if (curCell->isLUT())
1957  LUTsInMacro.push_back(curCell);
1958  }
1959  }
1960 
1961  // fill LUTs into slots
1962  for (DesignInfo::DesignPin *pinBeDriven : curMUXF7->getInputPins())
1963  {
1964  if (!pinBeDriven->getDriverPin())
1965  {
1966  continue;
1967  }
1968 
1969  assert(pinBeDriven->getDriverPin());
1970  if (pinBeDriven->getRefPinName() == "I0")
1971  {
1972  auto I0LUT = pinBeDriven->getDriverPin()->getCell();
1973  if (MUXF7Macro->hasCell(I0LUT))
1974  {
1975  assert(I0LUT->isLUT6());
1976  slotMapping.LUTs[halfCLBOffset][0][F7Offset * 2 + 1] = I0LUT; // I0 is for the upper one
1977  }
1978  }
1979  else if (pinBeDriven->getRefPinName() == "I1")
1980  {
1981  if (!pinBeDriven->getDriverPin())
1982  {
1983  continue;
1984  }
1985 
1986  assert(pinBeDriven->getDriverPin());
1987  auto I1LUT = pinBeDriven->getDriverPin()->getCell();
1988  if (MUXF7Macro->hasCell(I1LUT))
1989  {
1990  assert(I1LUT->isLUT6());
1991  slotMapping.LUTs[halfCLBOffset][0][F7Offset * 2] = I1LUT; // I1 is for the lower one
1992  }
1993  }
1994  }
1995 
1996  // fill route-thru LUTs
1997  unsigned int fillVirtualCellCnt = 0;
1998  assert(virtualLUTs.size() <= 2 && "There should be no more than two Virtual LUTs");
1999  for (unsigned int i = F7Offset * 2; i < F7Offset * 2 + 2; i++)
2000  {
2001  if (!slotMapping.LUTs[halfCLBOffset][0][i])
2002  {
2003  assert(fillVirtualCellCnt < virtualLUTs.size());
2004  slotMapping.LUTs[halfCLBOffset][0][i] = virtualLUTs[fillVirtualCellCnt];
2005  fillVirtualCellCnt++;
2006  }
2007  }
2008  assert(fillVirtualCellCnt == virtualLUTs.size());
2009 
2010  // fill selection signal FFs
2011  if (1 == virtualFFs.size())
2012  {
2013  slotMapping.FFs[halfCLBOffset][0][F7Offset * 2] = virtualFFs[0];
2014  }
2015  else
2016  {
2017  assert(virtualFFs.size() <= 1 && "MUXF7 macro should only have one virtual FF.");
2018  }
2019 
2020  if (1 == FFsInMacro.size())
2021  {
2022  assert(FFsInMacro.size() == 1);
2023  slotMapping.FFs[halfCLBOffset][0][F7Offset * 2 + 1] = FFsInMacro[0];
2024  }
2025  else
2026  {
2027  assert(FFsInMacro.size() <= 1 && "undefined situation");
2028  }
2029 
2030  for (auto tmpCell : MUXF7Macro->getCells())
2031  {
2032  mappedCells.insert(tmpCell);
2033  if (tmpCell->isLUT())
2034  mappedLUTs.insert(tmpCell);
2035  else if (tmpCell->isFF())
2036  mappedFFs.insert(tmpCell);
2037  }
2038 }
2039 
2041 {
2042  assert(determinedClusterInSite);
2043  int resHalfCLB = -1;
2044  for (int i = 0; i < 4; i++)
2045  {
2046  for (auto tmpFF : determinedClusterInSite->getFFControlSets()[i].getFFs())
2047  {
2048  if (MUXF8Macro->hasCell(tmpFF))
2049  {
2050  if (resHalfCLB == -1)
2051  resHalfCLB = i;
2052  else
2053  assert(resHalfCLB == i);
2054  }
2055  }
2056  }
2057  return resHalfCLB;
2058 }
2060 {
2061  assert(!checkIsPrePackedSite() && checkIsMuxSite());
2062  assert(fixedPairedLUTs.size() == 0 && conflictLUTs.size() == 0);
2063  assert(determinedClusterInSite->getSingleLUTs().size() + determinedClusterInSite->getPairedLUTs().size() <= 8);
2064 
2065  for (int i = 0; i < 24; i++)
2066  greedyMapMuxForCommonLUTFFInSite(i);
2067 
2068  determinedClusterInSite->evictFFsFromMuxHalfCLB();
2069 
2070  for (int i = 0; i < 24; i++)
2071  greedyMapMuxForCommonLUTFFInSite(i);
2072 
2073  slotMapping = best_SlotMapping;
2074  mappedCells = best_mappedCells;
2075  mappedLUTs = best_mappedLUTs;
2076  mappedFFs = best_mappedFFs;
2077  moveLUTToLUT6Slot();
2078 }
2079 
2081 {
2082  int FFSwapOption[24][4] = {{0, 1, 2, 3}, {0, 1, 3, 2}, {0, 2, 1, 3}, {0, 2, 3, 1}, {0, 3, 1, 2}, {0, 3, 2, 1},
2083  {1, 0, 2, 3}, {1, 0, 3, 2}, {1, 2, 0, 3}, {1, 2, 3, 0}, {1, 3, 0, 2}, {1, 3, 2, 0},
2084  {2, 0, 1, 3}, {2, 0, 3, 1}, {2, 1, 0, 3}, {2, 1, 3, 0}, {2, 3, 0, 1}, {2, 3, 1, 0},
2085  {3, 0, 1, 2}, {3, 0, 2, 1}, {3, 1, 0, 2}, {3, 1, 2, 0}, {3, 2, 0, 1}, {3, 2, 1, 0}};
2086 
2087  slotMapping = SiteBELMapping();
2088  mappedCells.clear();
2089  mappedLUTs.clear();
2090  mappedFFs.clear();
2091  std::map<DesignInfo::DesignCell *, DesignInfo::DesignCell *> LUT2FF;
2092  std::map<DesignInfo::DesignCell *, DesignInfo::DesignCell *> FF2LUT;
2093  auto singleLUTs = determinedClusterInSite->getSortedSingleLUTs();
2094  auto pairedLUTs = determinedClusterInSite->getSortedPairedLUTs();
2095 
2096  for (int i = 0; i < 2; i++)
2097  {
2098  for (int j = 0; j < 2; j++)
2099  {
2100  int halfCLBId = i * 2 + j;
2101  auto &CSFF = determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId]];
2102  int anotherHalfCLBId = i * 2 + (1 - j);
2103  auto &anotherCSFF =
2104  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][anotherHalfCLBId]];
2105  if (!compatibleInOneHalfCLB(FFSwapOption[FFControlSetOrderId][halfCLBId],
2106  FFSwapOption[FFControlSetOrderId][anotherHalfCLBId]))
2107  {
2108  return;
2109  }
2110  if (halfCLBId != FFSwapOption[FFControlSetOrderId][halfCLBId])
2111  {
2112  for (auto FF : CSFF.getFFs())
2113  {
2115  {
2116  return;
2117  }
2118  }
2119  }
2120  if (anotherHalfCLBId != FFSwapOption[FFControlSetOrderId][anotherHalfCLBId])
2121  {
2122  for (auto FF : anotherCSFF.getFFs())
2123  {
2125  {
2126  return;
2127  }
2128  }
2129  }
2130  }
2131  }
2132 
2133  for (auto &FFSet : determinedClusterInSite->getFFControlSets())
2134  {
2135  for (auto curFF : FFSet.getFFs())
2136  {
2137  PlacementInfo::PlacementMacro *pairMacro =
2139  if (pairMacro)
2140  {
2142  {
2143  assert(pairMacro->getCells().size() == 2);
2144  assert(pairMacro->getCells()[0]->isLUT());
2145  assert(pairMacro->getCells()[1]->isFF());
2146  FF2LUT[pairMacro->getCells()[1]] = pairMacro->getCells()[0];
2147  LUT2FF[pairMacro->getCells()[0]] = pairMacro->getCells()[1];
2148  }
2149  }
2150  }
2151  }
2152 
2153  std::vector<PlacementInfo::PlacementMacro *> MUXF7Macros;
2154  std::vector<PlacementInfo::PlacementMacro *> MUXF8Macros;
2155  for (auto tmpPU : determinedClusterInSite->getPUs())
2156  {
2157  if (auto tmpMacro = dynamic_cast<PlacementInfo::PlacementMacro *>(tmpPU))
2158  {
2159  if (tmpMacro->getMacroType() == PlacementInfo::PlacementMacro::PlacementMacroType_MUX7)
2160  {
2161  MUXF7Macros.push_back(tmpMacro);
2162  }
2163  else if (tmpMacro->getMacroType() == PlacementInfo::PlacementMacro::PlacementMacroType_MUX8)
2164  {
2165  MUXF8Macros.push_back(tmpMacro);
2166  }
2167  }
2168  }
2169  assert((MUXF7Macros.size() + 1) / 2 + MUXF8Macros.size() <= 2);
2170 
2171  for (unsigned int i = 0; i < MUXF8Macros.size(); i++)
2172  {
2173  assert(MUXF8Macros[i]->getCells().size() == 10);
2174  assert(MUXF8Macros[i]->getCells()[0]->getCellType() == DesignInfo::CellType_MUXF8);
2175  int targetHalfCLB = findMuxFromHalfCLB(MUXF8Macros[i]);
2176  assert(targetHalfCLB % 2 == 0);
2177  mapMuxF8Macro(targetHalfCLB / 2, MUXF8Macros[i]);
2178  }
2179 
2180  for (unsigned int i = 0; i < MUXF7Macros.size(); i++)
2181  {
2182  assert(MUXF7Macros[i]->getCells().size() >= 4 && MUXF7Macros[i]->getCells().size() <= 5);
2183  assert(MUXF7Macros[i]->getCells()[0]->getCellType() == DesignInfo::CellType_MUXF7);
2184  int targetHalfCLB = findMuxFromHalfCLB(MUXF7Macros[i]);
2185  assert(targetHalfCLB % 2 == 0);
2186  mapMuxF7Macro(targetHalfCLB / 2, MUXF7Macros[i]);
2187  }
2188 
2189  // mapped FFs
2190  for (int i = 0; i < 2; i++)
2191  {
2192  for (int j = 0; j < 2; j++)
2193  {
2194  for (int k = 3; k >= 0; k--)
2195  {
2196  if (!slotMapping.FFs[i][j][k])
2197  {
2198  int halfCLBId = i * 2 + j;
2199  auto &CSFF =
2200  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId]];
2201 
2202  for (auto tmpFF : CSFF.getFFs())
2203  {
2204  if (mappedFFs.find(tmpFF) == mappedFFs.end())
2205  {
2206  mappedFFs.insert(tmpFF);
2207  mappedCells.insert(tmpFF);
2208  slotMapping.FFs[i][j][k] = tmpFF;
2209  break;
2210  }
2211  }
2212  }
2213  }
2214  }
2215  }
2216 
2217  assert(determinedClusterInSite->getSingleLUTs().size() + determinedClusterInSite->getPairedLUTs().size() <= 8);
2218  // map LUT connected to carry and their paired LUT
2219  for (int i = 0; i < 2; i++)
2220  {
2221  for (int j = 0; j < 2; j++)
2222  {
2223  for (int k = 0; k < 4; k++)
2224  {
2225  if (!slotMapping.LUTs[i][j][k])
2226  {
2227  if (slotMapping.LUTs[i][1 - j][k])
2228  {
2229  for (auto pair : pairedLUTs)
2230  {
2231  if (pair.first == slotMapping.LUTs[i][1 - j][k])
2232  {
2233  slotMapping.LUTs[i][1 - j][k] = pair.first;
2234  slotMapping.LUTs[i][j][k] = pair.second;
2235  mappedLUTs.insert(slotMapping.LUTs[i][j][k]);
2236  mappedLUTs.insert(slotMapping.LUTs[i][1 - j][k]);
2237  mappedCells.insert(slotMapping.LUTs[i][j][k]);
2238  mappedCells.insert(slotMapping.LUTs[i][1 - j][k]);
2239  break;
2240  }
2241  if (pair.second == slotMapping.LUTs[i][1 - j][k])
2242  {
2243  slotMapping.LUTs[i][1 - j][k] = pair.second;
2244  slotMapping.LUTs[i][j][k] = pair.first;
2245  mappedLUTs.insert(slotMapping.LUTs[i][j][k]);
2246  mappedLUTs.insert(slotMapping.LUTs[i][1 - j][k]);
2247  mappedCells.insert(slotMapping.LUTs[i][j][k]);
2248  mappedCells.insert(slotMapping.LUTs[i][1 - j][k]);
2249  break;
2250  }
2251  }
2252  }
2253  }
2254  }
2255  }
2256  }
2257 
2258  // mapped paired LUTs
2259  for (int i = 0; i < 2; i++)
2260  {
2261  for (int k = 0; k < 4; k++)
2262  {
2263  if (!slotMapping.LUTs[i][0][k] && !slotMapping.LUTs[i][1][k])
2264  {
2265  for (auto pair : pairedLUTs)
2266  {
2267  if (mappedLUTs.find(pair.first) == mappedLUTs.end())
2268  {
2269  if (mappedLUTs.find(pair.second) != mappedLUTs.end())
2270  {
2271  for (int i = 0; i < 2; i++)
2272  {
2273  for (int j = 0; j < 2; j++)
2274  {
2275  for (int k = 0; k < 4; k++)
2276  {
2277  if (slotMapping.LUTs[i][j][k])
2278  {
2279  std::cout << "i,j,k:" << i << "," << j << "," << k << " "
2280  << slotMapping.LUTs[i][j][k] << "\n";
2281  }
2282  }
2283  }
2284  }
2285  determinedClusterInSite->printMyself();
2286  std::cout.flush();
2287  }
2288  assert(mappedLUTs.find(pair.second) == mappedLUTs.end());
2289  slotMapping.LUTs[i][0][k] = pair.first;
2290  slotMapping.LUTs[i][1][k] = pair.second;
2291  mappedLUTs.insert(slotMapping.LUTs[i][0][k]);
2292  mappedLUTs.insert(slotMapping.LUTs[i][1][k]);
2293  mappedCells.insert(slotMapping.LUTs[i][0][k]);
2294  mappedCells.insert(slotMapping.LUTs[i][1][k]);
2295  break;
2296  }
2297  }
2298  }
2299  }
2300  }
2301 
2302  // mapped single LUTs
2303  for (int i = 0; i < 2; i++)
2304  {
2305  for (int k = 0; k < 4; k++)
2306  {
2307  if (!slotMapping.LUTs[i][0][k] && !slotMapping.LUTs[i][1][k])
2308  {
2309  for (auto tmpLUT : singleLUTs)
2310  {
2311  if (mappedLUTs.find(tmpLUT) == mappedLUTs.end())
2312  {
2313  assert(mappedLUTs.find(tmpLUT) == mappedLUTs.end());
2314  slotMapping.LUTs[i][0][k] = tmpLUT;
2315  mappedLUTs.insert(tmpLUT);
2316  mappedCells.insert(tmpLUT);
2317  break;
2318  }
2319  }
2320  }
2321  }
2322  }
2323 
2324  assert(determinedClusterInSite->getSingleLUTs().size() + determinedClusterInSite->getPairedLUTs().size() <= 8);
2325 
2326  for (int i0 = 0; i0 < 2; i0++)
2327  {
2328  for (int k0 = 0; k0 < 4; k0++)
2329  {
2330  if (!isLUT6(slotMapping.LUTs[i0][0][k0]) && !isLUT6(slotMapping.LUTs[i0][1][k0]))
2331  {
2332  float oriDirectInternalRoute_slack =
2333  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][0][k0], slotMapping.FFs[i0][0][k0]) +
2334  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][1][k0], slotMapping.FFs[i0][1][k0]);
2335 
2336  // switch locations
2337  float newDirectInternalRoute_slack =
2338  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][0][k0], slotMapping.FFs[i0][1][k0]) +
2339  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][1][k0], slotMapping.FFs[i0][0][k0]);
2340 
2341  if (oriDirectInternalRoute_slack < newDirectInternalRoute_slack)
2342  {
2343  DesignInfo::DesignCell *tmpLUT;
2344  tmpLUT = slotMapping.LUTs[i0][0][k0];
2345  slotMapping.LUTs[i0][0][k0] = slotMapping.LUTs[i0][1][k0];
2346  slotMapping.LUTs[i0][1][k0] = tmpLUT;
2347  assert(!isLUT6(tmpLUT));
2348  }
2349  }
2350  }
2351  }
2352 
2353  int LUTSwapOptions[4][4] = {{0, 1, 0, 1}, {1, 0, 0, 1}, {1, 0, 1, 0}, {0, 1, 1, 0}};
2354 
2355  for (int i0 = 0; i0 < 2; i0++)
2356  {
2357  for (int k0 = 0; k0 < 4; k0++)
2358  {
2359  for (int i1 = 0; i1 < 2; i1++)
2360  {
2361  for (int k1 = 0; k1 < 4; k1++)
2362  {
2363  if (i0 == i1 && k0 == k1)
2364  {
2365  continue;
2366  }
2367  if ((isMuxMacro(slotMapping.LUTs[i0][0][k0]) || isMuxMacro(slotMapping.FFs[i0][0][k0]) ||
2368  isMuxMacro(slotMapping.LUTs[i0][1][k0]) || isMuxMacro(slotMapping.FFs[i0][1][k0]) ||
2369  isMuxMacro(slotMapping.LUTs[i1][0][k1]) || isMuxMacro(slotMapping.FFs[i1][0][k1]) ||
2370  isMuxMacro(slotMapping.LUTs[i1][1][k1]) || isMuxMacro(slotMapping.FFs[i1][1][k1])))
2371  continue;
2372  float oriDirectInternalRoute_slack =
2373  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][0][k0], slotMapping.FFs[i0][0][k0]) +
2374  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][1][k0], slotMapping.FFs[i0][1][k0]) +
2375  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i1][0][k1], slotMapping.FFs[i1][0][k1]) +
2376  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i1][1][k1], slotMapping.FFs[i1][1][k1]);
2377 
2378  float optDirectInternalRoute_slack = -100000000;
2379  int optimalOption = -1;
2380  for (int optionId = 0; optionId < 4; optionId++)
2381  {
2382  // switch locations
2383  float newDirectInternalRoute_slack =
2384  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i1][LUTSwapOptions[optionId][0]][k1],
2385  slotMapping.FFs[i0][0][k0]) +
2386  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i1][LUTSwapOptions[optionId][1]][k1],
2387  slotMapping.FFs[i0][1][k0]) +
2388  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][LUTSwapOptions[optionId][2]][k0],
2389  slotMapping.FFs[i1][0][k1]) +
2390  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i0][LUTSwapOptions[optionId][3]][k0],
2391  slotMapping.FFs[i1][1][k1]);
2392  if (isLUT6(slotMapping.LUTs[i1][LUTSwapOptions[optionId][1]][k1]) ||
2393  isLUT6(slotMapping
2394  .LUTs[i0][LUTSwapOptions[optionId][3]][k0])) // illegal to put LUT6 at LUT5 slot
2395  continue;
2396  if (newDirectInternalRoute_slack > optDirectInternalRoute_slack)
2397  {
2398  optimalOption = optionId;
2399  optDirectInternalRoute_slack = newDirectInternalRoute_slack;
2400  }
2401  }
2402 
2403  if (oriDirectInternalRoute_slack < optDirectInternalRoute_slack)
2404  {
2405  DesignInfo::DesignCell *tmpLUT0 = slotMapping.LUTs[i1][LUTSwapOptions[optimalOption][0]][k1];
2406  DesignInfo::DesignCell *tmpLUT1 = slotMapping.LUTs[i1][LUTSwapOptions[optimalOption][1]][k1];
2407  DesignInfo::DesignCell *tmpLUT2 = slotMapping.LUTs[i0][LUTSwapOptions[optimalOption][2]][k0];
2408  DesignInfo::DesignCell *tmpLUT3 = slotMapping.LUTs[i0][LUTSwapOptions[optimalOption][3]][k0];
2409  slotMapping.LUTs[i0][0][k0] = tmpLUT0;
2410  assert(!isLUT6(tmpLUT1));
2411  slotMapping.LUTs[i0][1][k0] = tmpLUT1;
2412  slotMapping.LUTs[i1][0][k1] = tmpLUT2;
2413  assert(!isLUT6(tmpLUT3));
2414  slotMapping.LUTs[i1][1][k1] = tmpLUT3;
2415  continue;
2416  }
2417  }
2418  }
2419  }
2420  }
2421 
2422  for (int i = 0; i < 2; i++)
2423  {
2424  for (int j = 0; j < 2; j++)
2425  {
2426  // if ((isMuxMacro(slotMapping.FFs[i][j][0]) || isMuxMacro(slotMapping.FFs[i][j][1]) ||
2427  // isMuxMacro(slotMapping.FFs[i][j][2]) || isMuxMacro(slotMapping.FFs[i][j][3])))
2428  // continue;
2429  float oriDirectInternalRoute_slack =
2430  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][0], slotMapping.FFs[i][j][0]) +
2431  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][1], slotMapping.FFs[i][j][1]) +
2432  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][2], slotMapping.FFs[i][j][2]) +
2433  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][3], slotMapping.FFs[i][j][3]);
2434  float optDirectInternalRoute_slack = -100000000;
2435  int optimalOption = -1;
2436  for (int optionId = 0; optionId < 24; optionId++)
2437  {
2438  bool moveFFofMux = false;
2439  for (int checkFFId = 0; checkFFId < 4; checkFFId++)
2440  {
2441  if (FFSwapOption[optionId][checkFFId] != checkFFId)
2442  {
2443  if (isMuxMacro(slotMapping.FFs[i][j][checkFFId]))
2444  {
2445  moveFFofMux = true;
2446  }
2447  }
2448  }
2449  if (moveFFofMux)
2450  continue;
2451 
2452  float newDirectInternalRoute_slack =
2453  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][0],
2454  slotMapping.FFs[i][j][FFSwapOption[optionId][0]]) +
2455  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][1],
2456  slotMapping.FFs[i][j][FFSwapOption[optionId][1]]) +
2457  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][2],
2458  slotMapping.FFs[i][j][FFSwapOption[optionId][2]]) +
2459  checkDirectLUTFFConnect_slack(FF2LUT, slotMapping.LUTs[i][j][3],
2460  slotMapping.FFs[i][j][FFSwapOption[optionId][3]]);
2461  if (newDirectInternalRoute_slack > optDirectInternalRoute_slack)
2462  {
2463  optimalOption = optionId;
2464  optDirectInternalRoute_slack = newDirectInternalRoute_slack;
2465  }
2466  }
2467  if (oriDirectInternalRoute_slack < optDirectInternalRoute_slack)
2468  {
2469  DesignInfo::DesignCell *tmpFF0 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][0]];
2470  DesignInfo::DesignCell *tmpFF1 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][1]];
2471  DesignInfo::DesignCell *tmpFF2 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][2]];
2472  DesignInfo::DesignCell *tmpFF3 = slotMapping.FFs[i][j][FFSwapOption[optimalOption][3]];
2473  slotMapping.FFs[i][j][0] = tmpFF0;
2474  slotMapping.FFs[i][j][1] = tmpFF1;
2475  slotMapping.FFs[i][j][2] = tmpFF2;
2476  slotMapping.FFs[i][j][3] = tmpFF3;
2477  continue;
2478  }
2479  }
2480  }
2481 
2482  if (determinedClusterInSite)
2483  {
2484 
2485  unsigned int FFCnt = 0;
2486  for (auto &CSFF : determinedClusterInSite->getFFControlSets())
2487  FFCnt += CSFF.getFFs().size();
2488 
2489  if (FFCnt != mappedFFs.size())
2490  {
2491  std::cout << "FFCnt: " << FFCnt << " mappedFFs.size():" << mappedFFs.size() << "\n";
2492  determinedClusterInSite->printMyself();
2493  for (int i = 0; i < 2; i++)
2494  {
2495  for (int k = 0; k < 4; k++)
2496  {
2497  for (int j = 0; j < 2; j++)
2498  {
2499  std::cout << "i,k,j:" << i << "," << k << "," << j << ":\n";
2500  if (slotMapping.LUTs[i][j][k])
2501  std::cout << slotMapping.LUTs[i][j][k] << "\n";
2502  if (slotMapping.FFs[i][j][k])
2503  std::cout << slotMapping.FFs[i][j][k] << "\n";
2504  }
2505  }
2506  }
2507  assert(FFCnt == mappedFFs.size());
2508  }
2509  }
2510 
2511  std::map<DesignInfo::DesignCell *, std::array<int, 3>> cell2slot;
2512  for (int i = 0; i < 2; i++)
2513  {
2514  for (int j = 0; j < 2; j++)
2515  {
2516  for (int k = 0; k < 4; k++)
2517  {
2518  if (slotMapping.FFs[i][j][k])
2519  {
2520  cell2slot[slotMapping.FFs[i][j][k]] = std::array<int, 3>({i, j, k});
2521  }
2522  if (slotMapping.LUTs[i][j][k])
2523  {
2524  cell2slot[slotMapping.LUTs[i][j][k]] = std::array<int, 3>({i, j, k});
2525  }
2526  }
2527  }
2528  }
2529  // Move FF to empty slot if possible to increase direct connect
2530  for (int i0 = 0; i0 < 2; i0++)
2531  {
2532  for (int j0 = 0; j0 < 2; j0++)
2533  {
2534  for (int k0 = 0; k0 < 4; k0++)
2535  {
2536  if (!slotMapping.LUTs[i0][j0][k0])
2537  continue;
2538  if (LUT2FF.find(slotMapping.LUTs[i0][j0][k0]) != LUT2FF.end())
2539  {
2540  auto targetFF = LUT2FF[slotMapping.LUTs[i0][j0][k0]];
2541  if (slotMapping.FFs[i0][j0][k0] == nullptr)
2542  {
2543  int halfCLBId0 = i0 * 2 + j0;
2544  auto &CSFF0 =
2545  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId0]];
2546  int i1 = cell2slot[targetFF].at(0);
2547  int j1 = cell2slot[targetFF].at(1);
2548  int k1 = cell2slot[targetFF].at(2);
2549  int halfCLBId1 = i1 * 2 + j1;
2550  auto &CSFF1 =
2551  determinedClusterInSite->getFFControlSets()[FFSwapOption[FFControlSetOrderId][halfCLBId1]];
2552  if (CSFF0.compatibleWith(CSFF1.getCSId()))
2553  {
2554  if (FFSwapOption[FFControlSetOrderId][halfCLBId1] !=
2555  FFSwapOption[FFControlSetOrderId][halfCLBId0])
2556  determinedClusterInSite->moveFFFromCS1ToCS0(
2557  targetFF, FFSwapOption[FFControlSetOrderId][halfCLBId1],
2558  FFSwapOption[FFControlSetOrderId][halfCLBId0]);
2559  slotMapping.FFs[i0][j0][k0] = targetFF;
2560  slotMapping.FFs[i1][j1][k1] = nullptr;
2561  cell2slot[targetFF] = std::array<int, 3>({i0, j0, k0});
2562  }
2563  }
2564  }
2565  }
2566  }
2567  }
2568 
2569  // Move LUT-FF to empty slot if possible to increase direct connect
2570  for (int i0 = 0; i0 < 2; i0++)
2571  {
2572  for (int j0 = 0; j0 < 2; j0++)
2573  {
2574  for (int k0 = 0; k0 < 4; k0++)
2575  {
2576  if (!slotMapping.LUTs[i0][j0][k0])
2577  continue;
2578  auto targetLUT = slotMapping.LUTs[i0][j0][k0];
2579  if (slotMapping.LUTs[i0][1 - j0][k0])
2580  continue;
2581  if (LUT2FF.find(targetLUT) != LUT2FF.end())
2582  {
2583  auto targetFF = LUT2FF[targetLUT];
2584  int i1 = cell2slot[targetFF].at(0);
2585  int j1 = cell2slot[targetFF].at(1);
2586  int k1 = cell2slot[targetFF].at(2);
2587  int halfCLBId1 = i1 * 2 + j1;
2588 
2589  if (i0 != i1 || j0 != j1 || k0 != k1)
2590  {
2591  bool optimized = false;
2592  for (int ii = 0; ii < 2 && !optimized; ii++)
2593  {
2594  for (int jj = 0; jj < 2 && !optimized; jj++)
2595  {
2596  bool compatible = true;
2597  if (targetLUT->isLUT6() && jj == 1)
2598  continue;
2599  int halfCLBId0 = ii * 2 + jj;
2600  for (int kk = 0; kk < 4; kk++)
2601  {
2602  if (slotMapping.FFs[ii][jj][kk])
2603  {
2604  if (slotMapping.FFs[ii][jj][kk]->getControlSetInfo())
2605  {
2606  if (slotMapping.FFs[ii][jj][kk]->getControlSetInfo()->getId() !=
2607  targetFF->getControlSetInfo()->getId())
2608  {
2609  compatible = false;
2610  }
2611  }
2612  }
2613  }
2614  for (int kk = 0; kk < 4; kk++)
2615  {
2616  if (slotMapping.FFs[ii][1 - jj][kk])
2617  {
2618  if (slotMapping.FFs[ii][1 - jj][kk]->getControlSetInfo())
2619  {
2620  if (slotMapping.FFs[ii][1 - jj][kk]->getControlSetInfo()->getCLK() !=
2621  targetFF->getControlSetInfo()->getCLK() ||
2622  slotMapping.FFs[ii][1 - jj][kk]->getControlSetInfo()->getSR() !=
2623  targetFF->getControlSetInfo()->getSR())
2624  {
2625  compatible = false;
2626  }
2627  }
2628  }
2629  }
2630 
2631  if (compatible)
2632  {
2633  for (int kk = 0; kk < 4; kk++)
2634  {
2635  if (!slotMapping.LUTs[ii][jj][kk] && !slotMapping.FFs[ii][jj][kk])
2636  {
2637  // if (slotMapping.LUTs[ii][1 - jj][kk])
2638  // continue;
2639  if (targetLUT->isLUT6())
2640  {
2641  if (slotMapping.LUTs[ii][1 - jj][kk])
2642  continue;
2643  }
2644  else
2645  {
2646  if (slotMapping.LUTs[ii][1 - jj][kk] &&
2647  slotMapping.LUTs[ii][1 - jj][kk]->isLUT6())
2648  continue;
2649  if (slotMapping.LUTs[ii][1 - jj][kk])
2650  {
2651  int pairPinNum =
2652  getPairPinNum(targetLUT, slotMapping.LUTs[ii][1 - jj][kk]);
2653  if (pairPinNum > 5)
2654  continue;
2655  }
2656  }
2657 
2658  slotMapping.LUTs[i0][j0][k0] = nullptr;
2659  slotMapping.FFs[i1][j1][k1] = nullptr;
2660  assert(!slotMapping.LUTs[ii][jj][kk]);
2661  assert(!slotMapping.FFs[ii][jj][kk]);
2662  slotMapping.LUTs[ii][jj][kk] = targetLUT;
2663  slotMapping.FFs[ii][jj][kk] = targetFF;
2664 
2665  if (FFSwapOption[FFControlSetOrderId][halfCLBId1] !=
2666  FFSwapOption[FFControlSetOrderId][halfCLBId0])
2667  determinedClusterInSite->moveFFFromCS1ToCS0(
2668  targetFF, FFSwapOption[FFControlSetOrderId][halfCLBId1],
2669  FFSwapOption[FFControlSetOrderId][halfCLBId0]);
2670  cell2slot[targetFF] = std::array<int, 3>({ii, jj, kk});
2671  cell2slot[targetLUT] = std::array<int, 3>({ii, jj, kk});
2672  optimized = true;
2673  break;
2674  }
2675  }
2676  }
2677  }
2678  }
2679  }
2680  }
2681  }
2682  }
2683  }
2684 
2685  auto &timingNodes = placementInfo->getTimingInfo()->getSimplePlacementTimingInfo();
2687  float directConnectCnt = 0;
2688  for (int i = 0; i < 2; i++)
2689  {
2690  for (int j = 0; j < 2; j++)
2691  {
2692  for (int k = 0; k < 4; k++)
2693  {
2694  if (checkDirectLUTFFConnect(FF2LUT, slotMapping.LUTs[i][j][k], slotMapping.FFs[i][j][k]))
2695  {
2696  auto srcCell = slotMapping.LUTs[i][j][k];
2697  unsigned int srcCellId = srcCell->getCellId();
2698  auto srcNode = timingNodes[srcCellId];
2699  // int succPathLen = srcNode->getLongestPathLength();
2700  if (srcNode->getForwardLevel() < 0)
2701  continue;
2702  float slack =
2703  (srcNode->getLatestInputArrival() - srcNode->getRequiredArrivalTime()) / clockPeriod + 20;
2704  directConnectCnt += slack;
2705  }
2706  }
2707  }
2708  }
2709  if (directConnectCnt > best_DirectConnect)
2710  {
2711  best_SlotMapping = slotMapping;
2712  best_mappedCells = mappedCells;
2713  best_mappedLUTs = mappedLUTs;
2714  best_mappedFFs = mappedFFs;
2715  best_DirectConnect = directConnectCnt;
2716  }
2717 }
2718 
2720 {
2721  assert(!checkIsPrePackedSite() && !checkIsMuxSite());
2722  assert(fixedPairedLUTs.size() == 0 && conflictLUTs.size() == 0);
2723  assert(determinedClusterInSite->getSingleLUTs().size() + determinedClusterInSite->getPairedLUTs().size() <= 8);
2724 
2725  for (int i = 0; i < 24; i++)
2726  finalMapToSlotsForCommonLUTFFInSite(i);
2727 
2728  slotMapping = best_SlotMapping;
2729  mappedCells = best_mappedCells;
2730  mappedLUTs = best_mappedLUTs;
2731  mappedFFs = best_mappedFFs;
2732  moveLUTToLUT6Slot();
2733 }
2734 
2736 {
2737  if (!determinedClusterInSite)
2738  return;
2739 
2740  for (int i = 0; i < 2; i++)
2741  {
2742  for (int k = 0; k < 4; k++)
2743  {
2744  if (!slotMapping.LUTs[i][0][k] && slotMapping.LUTs[i][1][k])
2745  {
2746  if (determinedClusterInSite->getSingleLUTs().find(slotMapping.LUTs[i][1][k]) !=
2747  determinedClusterInSite->getSingleLUTs().end())
2748  {
2749  slotMapping.LUTs[i][0][k] = slotMapping.LUTs[i][1][k];
2750  slotMapping.LUTs[i][1][k] = nullptr;
2751  }
2752  }
2753  }
2754  }
2755 }
2756 
2758 {
2759  LUTRAMMacro = _LUTRAMMacro;
2760  isLUTRAMSite = true;
2761 }
2762 
2764  float siteOffset)
2765 {
2766  isCarrySite = true;
2767  CARRYChain = _CARRYChain;
2768  CARRYChainSiteOffset = siteOffset;
2769 
2770  mappedCells.clear();
2771  mappedLUTs.clear();
2772  mappedFFs.clear();
2773 
2774  float siteHeight = 1.0;
2775  float lowerBound = siteHeight * CARRYChainSiteOffset - (1e-2);
2776  float uppwerBound = siteHeight * CARRYChainSiteOffset + (1e-2);
2777  for (auto curCell : CARRYChain->getCells())
2778  {
2779  float offsetY = CARRYChain->getCellOffsetYInMacro(curCell);
2780  if (offsetY > lowerBound && offsetY < uppwerBound)
2781  {
2782  if (curCell->isCarry())
2783  {
2784  carryCell = curCell;
2785  auto curCarry = curCell;
2786  slotMapping.Carry = curCarry;
2787  mappedCells.insert(curCarry);
2788  for (DesignInfo::DesignPin *pinBeDriven : curCarry->getInputPins())
2789  {
2790  if (pinBeDriven->isUnconnected())
2791  continue;
2792  if (!pinBeDriven->getDriverPin()) // pin connect to GND/VCC which has no specifc driver pin
2793  continue;
2794 
2795  if (pinBeDriven->getRefPinName().find("S[") == 0)
2796  {
2797  if (placementInfo->getPlacementUnitByCell(pinBeDriven->getDriverPin()->getCell()) ==
2798  CARRYChain &&
2799  pinBeDriven->getDriverPin()->getCell()->isLUT())
2800  {
2801  char SPinCellId =
2802  pinBeDriven->getRefPinName()[pinBeDriven->getRefPinName().find("[") + 1] - '0';
2803  slotMapping.LUTs[SPinCellId / 4][0][SPinCellId % 4] =
2804  pinBeDriven->getDriverPin()->getCell();
2805  mappedCells.insert(pinBeDriven->getDriverPin()->getCell());
2806  mappedLUTs.insert(pinBeDriven->getDriverPin()->getCell());
2807  // char LUTCode = SPinCellId + 'A';
2808  // std::string LUTSiteName = std::string(1, LUTCode) + "6LUT";
2809  // outfile0 << " " << pinBeDriven->getDriverPin()->getCell()->getName() << " "
2810  // << CLBSite->getName() << "/" + LUTSiteName << "\n";
2811  }
2812  }
2813  else if (pinBeDriven->getRefPinName().find("DI[") == 0)
2814  {
2815  if (placementInfo->getPlacementUnitByCell(pinBeDriven->getDriverPin()->getCell()) ==
2816  CARRYChain &&
2817  pinBeDriven->getDriverPin()->getCell()->isLUT())
2818  {
2819  char DIPinCellId =
2820  pinBeDriven->getRefPinName()[pinBeDriven->getRefPinName().find("[") + 1] - '0';
2821  slotMapping.LUTs[DIPinCellId / 4][1][DIPinCellId % 4] =
2822  pinBeDriven->getDriverPin()->getCell();
2823  mappedCells.insert(pinBeDriven->getDriverPin()->getCell());
2824  mappedLUTs.insert(pinBeDriven->getDriverPin()->getCell());
2825  // char LUTCode = DIPinCellId + 'A';
2826  // std::string LUTSiteName = std::string(1, LUTCode) + "5LUT";
2827  // outfile0 << " " << pinBeDriven->getDriverPin()->getCell()->getName() << " "
2828  // << CLBSite->getName() << "/" + LUTSiteName << "\n";
2829  }
2830  }
2831  }
2832  std::vector<std::string> checkFFRefPins{"O[", "CO["};
2833  for (DesignInfo::DesignPin *driverPin : curCarry->getOutputPins())
2834  {
2835  if (driverPin->isUnconnected())
2836  continue;
2837  DesignInfo::DesignNet *curOutputNet = driverPin->getNet();
2838  bool findMatchedInputPin = false;
2839  for (auto patternPin : checkFFRefPins)
2840  {
2841  if (driverPin->getRefPinName().find(patternPin) == 0)
2842  {
2843  findMatchedInputPin = true;
2844  break;
2845  }
2846  }
2847 
2848  if (findMatchedInputPin)
2849  {
2850  int FFcnt = 0;
2851  DesignInfo::DesignCell *theFF = nullptr;
2852  for (auto pinBeDriven : curOutputNet->getPinsBeDriven())
2853  {
2854  if (pinBeDriven->getCell()->isFF())
2855  {
2856  FFcnt++;
2857  if (pinBeDriven->getRefPinName().find("D") != std::string::npos)
2858  {
2859  if (placementInfo->getPlacementUnitByCell(pinBeDriven->getCell()) == CARRYChain)
2860  theFF = pinBeDriven->getCell();
2861  }
2862  }
2863  }
2864  if (FFcnt == 1 && theFF)
2865  {
2866  char FFPinCellId =
2867  driverPin->getRefPinName()[driverPin->getRefPinName().find("[") + 1] - '0';
2868  if (driverPin->getRefPinName().find("CO[") != std::string::npos)
2869  {
2870  assert(!slotMapping.FFs[FFPinCellId / 4][1][FFPinCellId % 4]);
2871  slotMapping.FFs[FFPinCellId / 4][1][FFPinCellId % 4] = theFF;
2872  mappedCells.insert(theFF);
2873  mappedFFs.insert(theFF);
2874  // std::string FFSiteName = std::string(1, FFCode) + "FF2";
2875  // outfile0 << " " << theFF->getName() << " " << CLBSite->getName() << "/" + FFSiteName
2876  // << "\n";
2877  }
2878  else if (driverPin->getRefPinName().find("O[") != std::string::npos)
2879  {
2880  assert(!slotMapping.FFs[FFPinCellId / 4][0][FFPinCellId % 4]);
2881  slotMapping.FFs[FFPinCellId / 4][0][FFPinCellId % 4] = theFF;
2882  mappedCells.insert(theFF);
2883  mappedFFs.insert(theFF);
2884  // std::string FFSiteName = std::string(1, FFCode) + "FF";
2885  // outfile0 << " " << theFF->getName() << " " << CLBSite->getName() << "/" + FFSiteName
2886  // << "\n";
2887  }
2888  }
2889  }
2890  }
2891  }
2892 
2893  if (curCell->isVirtualCell())
2894  {
2895  assert(mappedCells.find(curCell) == mappedCells.end());
2896  if (curCell->isLUT())
2897  {
2898  assert(curCell->isLUT6());
2899  assert(curCell->getName().find('(') != std::string::npos);
2900  int slotId = curCell->getName()[curCell->getName().find('(') - 1] - '0';
2901  assert(slotId < 8 && slotId >= 0);
2902  assert(!slotMapping.LUTs[slotId / 4][0][slotId % 4]);
2903  mappedCells.insert(curCell);
2904  mappedLUTs.insert(curCell);
2905  slotMapping.LUTs[slotId / 4][0][slotId % 4] = curCell;
2906  }
2907  else
2908  {
2909  assert(curCell->isFF());
2910  assert(curCell->getName().find('(') != std::string::npos);
2911  int strOffset = curCell->getName().find('(');
2912  int slotId = curCell->getName()[strOffset - 1] - '0';
2913  assert(curCell->getName()[strOffset - 2] == '2' || curCell->getName()[strOffset - 2] == 'F');
2914  int oddCLB = curCell->getName()[strOffset - 2] == '2';
2915  assert(slotId < 8 && slotId >= 0);
2916  assert(!slotMapping.FFs[slotId / 4][oddCLB][slotId % 4]);
2917  mappedCells.insert(curCell);
2918  mappedFFs.insert(curCell);
2919  slotMapping.FFs[slotId / 4][oddCLB][slotId % 4] = curCell;
2920  }
2921  }
2922  }
2923  }
2924 
2925  fixedPairedLUTs.clear();
2926  conflictLUTs.clear();
2927  fixedLUTsInPairs.clear();
2928 
2929  for (int i = 0; i < 2; i++)
2930  for (int k = 0; k < 4; k++)
2931  {
2932  if (slotMapping.LUTs[i][0][k] && !slotMapping.LUTs[i][1][k])
2933  {
2934  conflictLUTs.insert(slotMapping.LUTs[i][0][k]);
2935  }
2936  else if (slotMapping.LUTs[i][1][k] && !slotMapping.LUTs[i][0][k])
2937  {
2938  conflictLUTs.insert(slotMapping.LUTs[i][1][k]);
2939  }
2940  else if (slotMapping.LUTs[i][1][k] && slotMapping.LUTs[i][0][k])
2941  {
2942  if (slotMapping.LUTs[i][1][k]->getCellId() < slotMapping.LUTs[i][0][k]->getCellId())
2943  {
2944  fixedPairedLUTs.emplace(slotMapping.LUTs[i][1][k], slotMapping.LUTs[i][0][k]);
2945  fixedLUTsInPairs.insert(slotMapping.LUTs[i][1][k]);
2946  fixedLUTsInPairs.insert(slotMapping.LUTs[i][0][k]);
2947  }
2948  else
2949  {
2950  fixedPairedLUTs.emplace(slotMapping.LUTs[i][0][k], slotMapping.LUTs[i][1][k]);
2951  fixedLUTsInPairs.insert(slotMapping.LUTs[i][1][k]);
2952  fixedLUTsInPairs.insert(slotMapping.LUTs[i][0][k]);
2953  }
2954  }
2955  }
2956 }
PlacementInfo::getBinGrid
std::vector< std::vector< PlacementBinInfo * > > & getBinGrid(unsigned int BELTypeId)
Get the Bin Grid object.
Definition: PlacementInfo.h:3099
DesignInfo::CellType_LUT6_2
@ CellType_LUT6_2
Definition: DesignInfo.h:80
ParallelCLBPacker::numNeighbor
int numNeighbor
the threshold number of cells for site
Definition: ParallelCLBPacker.h:2984
ParallelCLBPacker::PackingCLBSite::removeClustersIncompatibleWithDetClusterFromPQ
void removeClustersIncompatibleWithDetClusterFromPQ()
remove clusters incompatible with determined cluster from PQ
Definition: ParallelCLBPacker_PackingCLBSite.cc:70
ParallelCLBPacker::PackingCLBSite::PackingCLBCluster::contains
bool contains(PlacementInfo::PlacementUnit *tmpPU)
check whether the cluster contains a specific PlacementUnit
Definition: ParallelCLBPacker.h:1027
PlacementInfo::PlacementBinInfo
BEL bin for global placement for a specific shared BEL type.
Definition: PlacementInfo.h:372
ParallelCLBPacker::PackingCLBSite::updateStep
void updateStep(bool initial, bool debug=false)
a iteration to pack PlacementUnit into a CLB site
Definition: ParallelCLBPacker_PackingCLBSite.cc:434
PlacementInfo::PlacementUnit::checkHasCARRY
bool checkHasCARRY()
Definition: PlacementInfo.h:1320
ParallelCLBPacker::PackingCLBSite::PackingCLBCluster::refreshId
void refreshId()
refresh the Id of the PackingCLBCluster so we can know it is changed.
Definition: ParallelCLBPacker.h:510
PlacementInfo::PlacementBinInfo::getCells
std::set< DesignInfo::DesignCell * > & getCells()
Get the reference of the set of cells in the bin.
Definition: PlacementInfo.h:563
PlacementInfo::PlacementMacro
a fixed group of multiple standard cells with constraints of their relative locations
Definition: PlacementInfo.h:1525
DesignInfo::DesignCell
a DesignCell in design netlist, DesignPin objects of which might connect to DesignNet objects
Definition: DesignInfo.h:782
PlacementInfo::checkClockColumnLegalization
bool checkClockColumnLegalization(PlacementInfo::PlacementUnit *curPU, DeviceInfo::DeviceSite *curSite)
check whether the given PlacementUnit can be mapped to the site considering the half-column clock leg...
Definition: PlacementInfo.h:4242
DesignInfo::DesignNet
a design net (hyperedge) defined in the design, connecting to pins of cells
Definition: DesignInfo.h:525
PlacementInfo::PlacementMacro::getCellOffsetYInMacro
float getCellOffsetYInMacro(DesignInfo::DesignCell *cell)
Definition: PlacementInfo.h:1762
PlacementInfo::getGridXY
void getGridXY(float cellX, float cellY, int &binIdX, int &binIdY)
Get the Grid row/column based on given location X,Y.
Definition: PlacementInfo.h:3344
ParallelCLBPacker::maxD
float maxD
the maximum constraint of the neighbor search diameter
Definition: ParallelCLBPacker.h:3002
ParallelCLBPacker::PackingCLBSite::finalMapToSlotsForCommonLUTFFInSite
void finalMapToSlotsForCommonLUTFFInSite()
finally map LUTs/FFs to the exact slots in the sites
Definition: ParallelCLBPacker_PackingCLBSite.cc:2719
ParallelCLBPacker.h
This header file contains the definitions of ParallelCLBPacker class and its internal modules and API...
PlacementInfo::PlacementMacro::PlacementMacroType_CARRY
@ PlacementMacroType_CARRY
Definition: PlacementInfo.h:1537
DesignInfo::DesignCellType
DesignCellType
design cell types
Definition: DesignInfo.h:73
ParallelCLBPacker::y2xRatio
float y2xRatio
Definition: ParallelCLBPacker.h:3041
DeviceInfo::getClockRegionByLocation
void getClockRegionByLocation(float locX, float locY, int &clockRegionX, int &clockRegionY)
Get the clock region ID (X/Y) by a given location (X/Y)
Definition: DeviceInfo.h:1183
ParallelCLBPacker::placementInfo
PlacementInfo * placementInfo
Definition: ParallelCLBPacker.h:2971
PlacementTimingInfo::TimingGraph::getClockPeriod
float getClockPeriod()
Get the clock period.
Definition: PlacementTimingInfo.h:777
PlacementInfo::PlacementMacro::PlacementMacroType_MUX8
@ PlacementMacroType_MUX8
Definition: PlacementInfo.h:1541
ParallelCLBPacker::PackingCLBSite::finalMapToSlotsForCarrySite
void finalMapToSlotsForCarrySite()
find the slots in the site for Carry by enumeration
Definition: ParallelCLBPacker_PackingCLBSite.cc:544
DesignInfo::DesignPin
A design pin on a design cell connected to a net.
Definition: DesignInfo.h:277
ParallelCLBPacker::PackingCLBSite::PackingCLBCluster::clusterHash
void clusterHash()
we use a hash function to encode the cluster to easily check duplicated clusters in the candidates
Definition: ParallelCLBPacker.h:1351
PlacementInfo::PlacementMacro::PlacementMacroType_MCLB
@ PlacementMacroType_MCLB
Definition: PlacementInfo.h:1535
ParallelCLBPacker::PQSize
int PQSize
the size of priority queue (the low-priority candidates will be removed)
Definition: ParallelCLBPacker.h:3008
ParallelCLBPacker::PackingCLBSite::moveLUTToLUT6Slot
void moveLUTToLUT6Slot()
Definition: ParallelCLBPacker_PackingCLBSite.cc:2735
PlacementInfo::getDeviceInfo
DeviceInfo * getDeviceInfo()
Definition: PlacementInfo.h:3308
ParallelCLBPacker::PackingCLBSite::SiteBELMapping
SiteBELMapping is a contain recording the mapping between cells and BELs.
Definition: ParallelCLBPacker.h:2184
PlacementInfo::PlacementMacro::PlacementMacroType_MUX7
@ PlacementMacroType_MUX7
Definition: PlacementInfo.h:1540
DesignInfo::DesignCell::getOriCellType
DesignCellType getOriCellType()
Get the Original Cell Type object defined in the design netlist.
Definition: DesignInfo.h:1089
PlacementInfo::PlacementUnit::checkHasMUX
bool checkHasMUX()
Definition: PlacementInfo.h:1324
ParallelCLBPacker::deltaD
float deltaD
the increase step of the neighbor search diameter
Definition: ParallelCLBPacker.h:2990
ParallelCLBPacker::PackingCLBSite::findNewClustersWithNeighborPUs
void findNewClustersWithNeighborPUs()
extend the clusters in the priority queue with the neighbor PlacementUnits
Definition: ParallelCLBPacker_PackingCLBSite.cc:256
DesignInfo::DesignNet::getPinsBeDriven
std::vector< DesignPin * > & getPinsBeDriven()
Get the vector reference of the pins driven by the net.
Definition: DesignInfo.h:581
ParallelCLBPacker::PackingCLBSite::greedyMapMuxForCommonLUTFFInSite
void greedyMapMuxForCommonLUTFFInSite()
find the slots in the site for Muxes by enumeration
Definition: ParallelCLBPacker_PackingCLBSite.cc:2059
isLUT6
bool isLUT6(DesignInfo::DesignCell *cell)
Definition: ParallelCLBPacker_PackingCLBSite.cc:536
ParallelCLBPacker::unchangedIterationThr
int unchangedIterationThr
specify how many iterations a PlacementUnit should stay at the top priority of a site before we final...
Definition: ParallelCLBPacker.h:2978
PlacementInfo::PlacementMacro::PlacementMacroType_LUTFFPair
@ PlacementMacroType_LUTFFPair
Definition: PlacementInfo.h:1529
ParallelCLBPacker::clockRegionAware
bool clockRegionAware
whether make clock region become constraints
Definition: ParallelCLBPacker.h:3047
ParallelCLBPacker::PackingCLBSite::PackingCLBCluster
PackingCLBCluster is a container of cells/PlacementUnits which can be packed in the corresponding CLB...
Definition: ParallelCLBPacker.h:453
ParallelCLBPacker::PackingCLBSite::PackingCLBCluster::addPU
bool addPU(PlacementInfo::PlacementUnit *tmpPU, bool allowOverlap=false)
try to add a given PlacementUnit into this cluster
Definition: ParallelCLBPacker_PackingCLBCluster.cc:621
ParallelCLBPacker::PackingCLBSite::PackingCLBCluster::getHash
int getHash()
Get the hash code for this cluster.
Definition: ParallelCLBPacker.h:1426
ParallelCLBPacker::PackingCLBSite::PackingCLBCluster::getPUs
const std::set< PlacementInfo::PlacementUnit *, Packing_PUcompare > & getPUs() const
Definition: ParallelCLBPacker.h:1032
PlacementInfo::PlacementBinInfo::getManhattanDistanceTo
float getManhattanDistanceTo(float inX, float inY)
Get the shortest Manhattan distance from the bin to a specific location.
Definition: PlacementInfo.h:413
PlacementInfo::getTimingInfo
PlacementTimingInfo * getTimingInfo()
Definition: PlacementInfo.h:3313
DesignInfo::CellType_MUXF8
@ CellType_MUXF8
Definition: DesignInfo.h:93
ParallelCLBPacker::PackingCLBSite::removeInvalidClustersFromPQ
void removeInvalidClustersFromPQ()
remove invalid clusters from the priority queue
Definition: ParallelCLBPacker_PackingCLBSite.cc:49
ParallelCLBPacker::PackingCLBSite::mapLUTRAMRelatedCellsToSlots
void mapLUTRAMRelatedCellsToSlots(PlacementInfo::PlacementMacro *_LUTRAMMacro)
Definition: ParallelCLBPacker_PackingCLBSite.cc:2757
ParallelCLBPacker::PackingCLBSite::findNeiborPUsFromBinGrid
std::set< PlacementInfo::PlacementUnit *, Packing_PUcompare > * findNeiborPUsFromBinGrid(DesignInfo::DesignCellType curCellType, float targetX, float targetY, float displacementLowerbound, float displacementUpperbound, int PUNumThreshold, const std::vector< PackingCLBSite * > &PUId2PackingCLBSite, float y2xRatio, std::set< PlacementInfo::PlacementUnit *, Packing_PUcompare > *res=nullptr, bool clockRegionAware=true)
find neighbor PlacementUnit around targetX/Y from the bin grid
Definition: ParallelCLBPacker_PackingCLBSite.cc:310
PlacementInfo::addPUIntoClockColumn
void addPUIntoClockColumn(PlacementInfo::PlacementUnit *curPU, DeviceInfo::DeviceSite *curSite)
map the given PlacementUnit to the site for later checking of the half-column clock legalization rule...
Definition: PlacementInfo.h:4301
PlacementInfo::PlacementMacro::hasCell
bool hasCell(DesignInfo::DesignCell *curCell)
Definition: PlacementInfo.h:1562
PlacementTimingInfo::getSimplePlacementTimingInfo
std::vector< TimingGraph< DesignInfo::DesignCell >::TimingNode * > & getSimplePlacementTimingInfo()
Get the Simple Timing Info object which regard design cells as timing nodes.
Definition: PlacementTimingInfo.h:835
ParallelCLBPacker::PackingCLBSite::PackingCLBCluster::incrementalUpdateScoreInSite
void incrementalUpdateScoreInSite(PlacementInfo::PlacementUnit *tmpPU)
incrementally update the score of this cluster by considering that only a given PlacementUnit will be...
Definition: ParallelCLBPacker_PackingCLBCluster.cc:568
DesignInfo::CellType_LUT6
@ CellType_LUT6
Definition: DesignInfo.h:79
ParallelCLBPacker::PackingCLBSite::mapMuxF7Macro
void mapMuxF7Macro(int halfCLBOffset, PlacementInfo::PlacementMacro *MUXF7Macro)
map cells in MUXF7 macro to CLB slot
Definition: ParallelCLBPacker_PackingCLBSite.cc:1918
DesignInfo::CellType_FDCE
@ CellType_FDCE
Definition: DesignInfo.h:82
ParallelCLBPacker::PackingCLBSite::mapCarryRelatedCellsToSlots
void mapCarryRelatedCellsToSlots(PlacementInfo::PlacementMacro *_CARRYChain, float siteOffset)
Definition: ParallelCLBPacker_PackingCLBSite.cc:2763
PlacementInfo::PlacementMacro::getCells
std::vector< DesignInfo::DesignCell * > & getCells()
Definition: PlacementInfo.h:1724
checkHalfColumn.i
int i
Definition: checkHalfColumn.py:5
PlacementInfo::getPlacementUnitByCell
PlacementUnit * getPlacementUnitByCell(DesignInfo::DesignCell *curCell)
Definition: PlacementInfo.h:3120
ParallelCLBPacker::curD
float curD
current neighbor search diameter
Definition: ParallelCLBPacker.h:2996
ParallelCLBPacker::PackingCLBSite::removeInvalidPUsFromNeighborPUs
void removeInvalidPUsFromNeighborPUs()
remove invalid clusters from neighbor PlacementUnits
Definition: ParallelCLBPacker_PackingCLBSite.cc:109
ParallelCLBPacker::PackingCLBSite::mapMuxF8Macro
void mapMuxF8Macro(int muxF8Offset, PlacementInfo::PlacementMacro *MUXF8Macro)
map cells in MUXF8 macro to CLB slot
Definition: ParallelCLBPacker_PackingCLBSite.cc:1775
ParallelCLBPacker::PackingCLBSite::findMuxFromHalfCLB
int findMuxFromHalfCLB(PlacementInfo::PlacementMacro *MUXF8Macro)
find the correspdnding FF control set id for a given Mux macro (this mux macro should have been mappe...
Definition: ParallelCLBPacker_PackingCLBSite.cc:2040
ParallelCLBPacker::PackingCLBSite::refreshPrioryQueue
void refreshPrioryQueue()
sort the elements in the priority queue
Definition: ParallelCLBPacker_PackingCLBSite.cc:28
ParallelCLBPacker::PackingCLBSite::updateConsistentPUsInTop
void updateConsistentPUsInTop()
update the information of consistent PUs at the top of priority queue
Definition: ParallelCLBPacker_PackingCLBSite.cc:142
ParallelCLBPacker::PackingCLBSite::priorityQueue
std::vector< PackingCLBCluster * > priorityQueue
Definition: ParallelCLBPacker.h:2745
PlacementInfo::PlacementMacro::getMacroType
PlacementMacroType getMacroType()
Definition: PlacementInfo.h:1841
ParallelCLBPacker::PUId2PackingCLBSite
std::vector< PackingCLBSite * > PUId2PackingCLBSite
Definition: ParallelCLBPacker.h:3023
PlacementInfo::getPotentialBELTypeIDs
std::vector< int > & getPotentialBELTypeIDs(DesignInfo::DesignCell *cell)
Definition: PlacementInfo.h:2932
PlacementTimingInfo::getSimplePlacementTimingGraph
TimingGraph< DesignInfo::DesignCell > * getSimplePlacementTimingGraph()
Get the Simple Placement Timing Graph object.
Definition: PlacementTimingInfo.h:850
DesignInfo::CellType_MUXF7
@ CellType_MUXF7
Definition: DesignInfo.h:92