4040 const LO
myNumRows =
static_cast<LO
> (this->getNodeNumRows ());
4042#ifdef HAVE_TPETRA_DEBUG
4046 ! diag.getMap ()->isCompatible (
rowMap), std::runtime_error,
4047 "The input Vector's Map must be compatible with the CrsMatrix's row "
4048 "Map. You may check this by using Map's isCompatible method: "
4049 "diag.getMap ()->isCompatible (A.getRowMap ());");
4052 if (this->isFillComplete ()) {
4053 const auto D_lcl = diag.getLocalViewDevice(Access::OverwriteAll);
4056 Kokkos::subview (
D_lcl, Kokkos::make_pair (LO (0),
myNumRows), 0);
4060 using ::Tpetra::Details::getDiagCopyWithoutOffsets;
4063 getLocalMatrixDevice ());
4066 using ::Tpetra::Details::getLocalDiagCopyWithoutOffsetsNotFillComplete;
4067 (
void) getLocalDiagCopyWithoutOffsetsNotFillComplete (diag, *
this);
4071 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4076 Kokkos::MemoryUnmanaged>& offsets)
const
4080#ifdef HAVE_TPETRA_DEBUG
4086 ! diag.getMap ()->isCompatible (
rowMap), std::runtime_error,
4087 "The input Vector's Map must be compatible with (in the sense of Map::"
4088 "isCompatible) the CrsMatrix's row Map.");
4098 auto D_lcl = diag.getLocalViewDevice (Access::OverwriteAll);
4099 const LO
myNumRows =
static_cast<LO
> (this->getNodeNumRows ());
4102 Kokkos::subview (
D_lcl, Kokkos::make_pair (LO (0),
myNumRows), 0);
4104 KokkosSparse::getDiagCopy (
D_lcl_1d, offsets,
4105 getLocalMatrixDevice ());
4108 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4112 const Teuchos::ArrayView<const size_t>& offsets)
const
4115 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
4118#ifdef HAVE_TPETRA_DEBUG
4124 ! diag.getMap ()->isCompatible (
rowMap), std::runtime_error,
4125 "The input Vector's Map must be compatible with (in the sense of Map::"
4126 "isCompatible) the CrsMatrix's row Map.");
4137 auto lclVecHost = diag.getLocalViewHost(Access::OverwriteAll);
4142 Kokkos::View<
const size_t*, Kokkos::HostSpace,
4143 Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
4146 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4147 const LO
myNumRows =
static_cast<LO
> (this->getNodeNumRows ());
4148 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
4152 Kokkos::parallel_for
4153 (
"Tpetra::CrsMatrix::getLocalDiagCopy",
4167 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4172 using ::Tpetra::Details::ProfilingRegion;
4173 using Teuchos::ArrayRCP;
4174 using Teuchos::ArrayView;
4175 using Teuchos::null;
4178 using Teuchos::rcpFromRef;
4182 ProfilingRegion
region (
"Tpetra::CrsMatrix::leftScale");
4185 if (this->getRangeMap ()->isSameAs (* (
x.getMap ()))) {
4188 auto exporter = this->getCrsGraphRef ().getExporter ();
4195 xp = rcpFromRef (
x);
4198 else if (this->getRowMap ()->isSameAs (* (
x.getMap ()))) {
4199 xp = rcpFromRef (
x);
4203 (
true, std::invalid_argument,
"x's Map must be the same as "
4204 "either the row Map or the range Map of the CrsMatrix.");
4207 if (this->isFillComplete()) {
4208 auto x_lcl =
xp->getLocalViewDevice (Access::ReadOnly);
4210 using ::Tpetra::Details::leftScaleLocalCrsMatrix;
4211 leftScaleLocalCrsMatrix (getLocalMatrixDevice (),
4217 (
true, std::runtime_error,
"CrsMatrix::leftScale requires matrix to be"
4222 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4227 using ::Tpetra::Details::ProfilingRegion;
4228 using Teuchos::ArrayRCP;
4229 using Teuchos::ArrayView;
4230 using Teuchos::null;
4233 using Teuchos::rcpFromRef;
4237 ProfilingRegion
region (
"Tpetra::CrsMatrix::rightScale");
4240 if (this->getDomainMap ()->isSameAs (* (
x.getMap ()))) {
4243 auto importer = this->getCrsGraphRef ().getImporter ();
4250 xp = rcpFromRef (
x);
4253 else if (this->getColMap ()->isSameAs (* (
x.getMap ()))) {
4254 xp = rcpFromRef (
x);
4257 (
true, std::runtime_error,
"x's Map must be the same as "
4258 "either the domain Map or the column Map of the CrsMatrix.");
4261 if (this->isFillComplete()) {
4262 auto x_lcl =
xp->getLocalViewDevice (Access::ReadOnly);
4264 using ::Tpetra::Details::rightScaleLocalCrsMatrix;
4265 rightScaleLocalCrsMatrix (getLocalMatrixDevice (),
4271 (
true, std::runtime_error,
"CrsMatrix::rightScale requires matrix to be"
4276 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4281 using Teuchos::ArrayView;
4282 using Teuchos::outArg;
4283 using Teuchos::REDUCE_SUM;
4284 using Teuchos::reduceAll;
4294 if (getNodeNumEntries() > 0) {
4295 if (isStorageOptimized ()) {
4298 const size_t numEntries = getNodeNumEntries ();
4299 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
4300 for (
size_t k = 0;
k < numEntries; ++
k) {
4314 const size_t numEntries =
rowInfo.numEntries;
4316 for (
size_t k = 0;
k < numEntries; ++
k) {
4329 if (isFillComplete ()) {
4338 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4348 myGraph_.is_null (), std::runtime_error,
4349 "This method does not work if the matrix has a const graph. The whole "
4350 "idea of a const graph is that you are not allowed to change it, but "
4351 "this method necessarily must modify the graph, since the graph owns "
4352 "the matrix's column Map.");
4356 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4360 const Teuchos::RCP<const map_type>&
newColMap,
4361 const Teuchos::RCP<const import_type>&
newImport,
4366 graph ==
nullptr && myGraph_.is_null (), std::invalid_argument,
4367 "The input graph is null, but the matrix does not own its graph.");
4382 auto vals = this->getValuesViewHostNonConst (
rowInfo);
4392 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4399 myGraph_.is_null (), std::runtime_error,
4400 "This method does not work if the matrix has a const graph. The whole "
4401 "idea of a const graph is that you are not allowed to change it, but this"
4402 " method necessarily must modify the graph, since the graph owns the "
4403 "matrix's domain Map and Import objects.");
4407 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4413 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
4415 myGraph_.is_null (), std::runtime_error,
4416 "This method does not work if the matrix has a const graph. The whole "
4417 "idea of a const graph is that you are not allowed to change it, but this"
4418 " method necessarily must modify the graph, since the graph owns the "
4419 "matrix's domain Map and Import objects.");
4423 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4430 myGraph_.is_null (), std::runtime_error,
4431 "This method does not work if the matrix has a const graph. The whole "
4432 "idea of a const graph is that you are not allowed to change it, but this"
4433 " method necessarily must modify the graph, since the graph owns the "
4434 "matrix's domain Map and Import objects.");
4438 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4444 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
4446 myGraph_.is_null (), std::runtime_error,
4447 "This method does not work if the matrix has a const graph. The whole "
4448 "idea of a const graph is that you are not allowed to change it, but this"
4449 " method necessarily must modify the graph, since the graph owns the "
4450 "matrix's domain Map and Import objects.");
4454 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4458 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4459 const Teuchos::ArrayView<const Scalar>&
values)
4461 using Teuchos::Array;
4480 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4487 using Teuchos::Comm;
4488 using Teuchos::outArg;
4491 using Teuchos::REDUCE_MAX;
4492 using Teuchos::REDUCE_MIN;
4493 using Teuchos::reduceAll;
4498 typedef typename Teuchos::Array<GO>::size_type size_type;
4502 const bool verbose = Behavior::verbose(
"CrsMatrix");
4503 std::unique_ptr<std::string>
prefix;
4505 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4506 std::ostringstream
os;
4507 os << *
prefix <<
"nonlocals_.size()=" << nonlocals_.size()
4509 std::cerr <<
os.str();
4514 (! isFillActive (), std::runtime_error,
"Fill must be active before "
4515 "you may call this method.");
4548 for (
auto mapIter = nonlocals_.begin ();
mapIter != nonlocals_.end ();
4563 typename Teuchos::Array<Scalar>::iterator
vals_newEnd;
4592 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4602 std::ostringstream
os;
4604 std::cerr <<
os.str();
4611 for (
auto mapIter = nonlocals_.begin ();
mapIter != nonlocals_.end ();
4633 int isLocallyComplete = 1;
4637 std::ostringstream
os;
4639 std::cerr <<
os.str();
4643 isLocallyComplete = 0;
4646 std::ostringstream
os;
4647 os << *
prefix <<
"doExport from nonlocalMatrix" <<
endl;
4648 std::cerr <<
os.str();
4655 std::ostringstream
os;
4656 os << *
prefix <<
"Original row Map is NOT 1-to-1" <<
endl;
4657 std::cerr <<
os.str();
4666 isLocallyComplete = 0;
4674 std::ostringstream
os;
4675 os << *
prefix <<
"Create & doExport into 1-to-1 matrix"
4677 std::cerr <<
os.str();
4687 std::ostringstream
os;
4689 std::cerr <<
os.str();
4695 std::ostringstream
os;
4696 os << *
prefix <<
"doImport from 1-to-1 matrix" <<
endl;
4697 std::cerr <<
os.str();
4708 std::ostringstream
os;
4710 std::cerr <<
os.str();
4727 "you called insertGlobalValues with a global row index which is not in "
4728 "the matrix's row Map on any process in its communicator.");
4731 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4736 if (! isStaticGraph ()) {
4737 myGraph_->resumeFill (
params);
4739 clearGlobalConstants ();
4740 fillComplete_ =
false;
4743 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4757 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4761 return getCrsGraphRef ().haveGlobalConstants ();
4764 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4776 frobNorm_ = -STM::one ();
4779 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4787 (this->getCrsGraph ().
is_null (), std::logic_error,
4788 "getCrsGraph() returns null. This should not happen at this point. "
4789 "Please report this bug to the Tpetra developers.");
4792 if (this->isStaticGraph () &&
graph.isFillComplete ()) {
4805 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4808 fillComplete (
const Teuchos::RCP<const map_type>& domainMap,
4809 const Teuchos::RCP<const map_type>&
rangeMap,
4810 const Teuchos::RCP<Teuchos::ParameterList>&
params)
4814 using Teuchos::ArrayRCP;
4820 (
"Tpetra::CrsMatrix::fillComplete");
4821 const bool verbose = Behavior::verbose(
"CrsMatrix");
4822 std::unique_ptr<std::string>
prefix;
4824 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4825 std::ostringstream
os;
4827 std::cerr <<
os.str ();
4830 "Tpetra::CrsMatrix::fillCompete",
4834 (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4835 "Matrix fill state must be active (isFillActive() "
4836 "must be true) before you may call fillComplete().");
4837 const int numProcs = this->getComm ()->getSize ();
4852 if (!
params.is_null ()) {
4855 if (
params->isParameter (
"sort column map ghost gids")) {
4858 else if (
params->isParameter (
"Sort column Map ghost GIDs")) {
4866 if (! this->myGraph_.is_null ()) {
4867 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ =
sortGhosts;
4870 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4871 if (this->hasColMap ()) {
4872 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4875 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4881 this->globalAssemble ();
4885 (
numProcs == 1 && nonlocals_.size() > 0,
4886 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4887 "An invalid entry (i.e., with row index not in the row Map) must have "
4888 "been submitted to the CrsMatrix.");
4891 if (this->isStaticGraph ()) {
4899#ifdef HAVE_TPETRA_DEBUG
4918 this->staticGraph_->getDomainMap ()->isSameAs (*
domainMap);
4920 this->staticGraph_->getRangeMap ()->isSameAs (*
rangeMap);
4924 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4925 "The graph cannot be changed because it was given to the CrsMatrix "
4926 "constructor as const. You can fix this by passing in the graph's "
4927 "domain Map and range Map to the matrix's fillComplete call.");
4931 "The CrsMatrix's range Map does not match the graph's range Map. "
4932 "The graph cannot be changed because it was given to the CrsMatrix "
4933 "constructor as const. You can fix this by passing in the graph's "
4934 "domain Map and range Map to the matrix's fillComplete call.");
4939 this->fillLocalMatrix (
params);
4950 Teuchos::Array<int> remotePIDs (0);
4953 this->myGraph_->makeColMap (remotePIDs);
4959 this->myGraph_->makeIndicesLocal(verbose);
4968 const bool sorted = this->myGraph_->isSorted ();
4969 const bool merged = this->myGraph_->isMerged ();
4979 this->fillLocalGraphAndMatrix (
params);
4982 params->get (
"compute global constants",
true);
4984 this->myGraph_->computeGlobalConstants ();
4987 this->myGraph_->computeLocalConstants ();
4989 this->myGraph_->fillComplete_ =
true;
4990 this->myGraph_->checkInternalState ();
4995 "Tpetra::CrsMatrix::fillCompete",
"callComputeGlobalConstamnts"
4998 params->get (
"compute global constants",
true);
5000 this->computeGlobalConstants ();
5006 this->fillComplete_ =
true;
5009 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
5011 this->checkInternalState ();
5015 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5019 const Teuchos::RCP<const map_type> &
rangeMap,
5020 const Teuchos::RCP<const import_type>&
importer,
5021 const Teuchos::RCP<const export_type>&
exporter,
5022 const Teuchos::RCP<Teuchos::ParameterList> &
params)
5024#ifdef HAVE_TPETRA_MMM_TIMINGS
5028 std::string
prefix = std::string(
"Tpetra ")+
label + std::string(
": ");
5029 using Teuchos::TimeMonitor;
5031 Teuchos::TimeMonitor
all(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-all")));
5036 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
5037 "must be true) before calling fillComplete().");
5039 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
5042#ifdef HAVE_TPETRA_MMM_TIMINGS
5043 Teuchos::TimeMonitor
graph(*TimeMonitor::getNewTimer(
prefix + std::string(
"eSFC-M-Graph")));
5050 params->get (
"compute global constants",
true);
5052 this->computeGlobalConstants ();
5056#ifdef HAVE_TPETRA_MMM_TIMINGS
5060 fillLocalGraphAndMatrix (
params);
5065 fillComplete_ =
true;
5068#ifdef HAVE_TPETRA_DEBUG
5070 ": We're at the end of fillComplete(), but isFillActive() is true. "
5071 "Please report this bug to the Tpetra developers.");
5073 ": We're at the end of fillComplete(), but isFillActive() is true. "
5074 "Please report this bug to the Tpetra developers.");
5077#ifdef HAVE_TPETRA_MMM_TIMINGS
5078 Teuchos::TimeMonitor
cIS(*TimeMonitor::getNewTimer(
prefix + std::string(
"ESFC-M-cIS")));
5081 checkInternalState();
5085 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5120 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5125 using ::Tpetra::Details::ProfilingRegion;
5127 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5128 host_execution_space;
5129 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5130 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
5131 ProfilingRegion
regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
5135 (this->isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
5136 "\"static\" (const) graph, since the matrix does not own the graph.");
5138 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
5139 "this matrix claims ! isStaticGraph(). "
5140 "Please report this bug to the Tpetra developers.");
5142 (this->isStorageOptimized (), std::logic_error,
"It is invalid to call "
5143 "this method if the graph's storage has already been optimized. "
5144 "Please report this bug to the Tpetra developers.");
5147 const LO
lclNumRows =
static_cast<LO
> (this->getNodeNumRows ());
5153 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
5154 auto cols_ =
graph.lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
5155 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0,
lclNumRows),
5172 graph.indicesAreSorted_ =
true;
5175 graph.noRedundancies_ =
true;
5180 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5191 using Teuchos::rcp_const_cast;
5192 using Teuchos::rcpFromRef;
5193 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5194 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
5228 (!
Y_in.isDistributed () && this->getComm ()->getSize () != 1);
5245 if (!
X_in.isConstantStride ()) {
5263 ProfilingRegion
regionImport (
"Tpetra::CrsMatrix::apply: Import");
5290 ProfilingRegion
regionExport (
"Tpetra::CrsMatrix::apply: Export");
5342 ProfilingRegion
regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
5347 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5352 const Teuchos::ETransp
mode,
5357 using Teuchos::null;
5360 using Teuchos::rcp_const_cast;
5361 using Teuchos::rcpFromRef;
5362 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5401 if (!
X_in.isConstantStride () &&
importer.is_null ()) {
5404 X = rcpFromRef (
X_in);
5409 if (importMV_ != Teuchos::null && importMV_->getNumVectors() !=
numVectors) {
5412 if (importMV_ ==
null) {
5417 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() !=
numVectors) {
5420 if (exportMV_ ==
null) {
5428 ProfilingRegion
regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
5437 ProfilingRegion
regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
5444 importMV_->putScalar (
ZERO);
5463 if (!
Y_in.isConstantStride () ||
X.getRawPtr () == &
Y_in) {
5477 ProfilingRegion
regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
5482 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5487 const Teuchos::ETransp
mode,
5492 using Teuchos::NO_TRANS;
5495 auto X_lcl =
X.getLocalViewDevice(Access::ReadOnly);
5496 auto Y_lcl =
Y.getLocalViewDevice(Access::ReadWrite);
5497 auto matrix_lcl = getLocalMultiplyOperator();
5503 (
X.getNumVectors () !=
Y.getNumVectors (), std::runtime_error,
5504 "X.getNumVectors() = " <<
X.getNumVectors () <<
" != "
5505 "Y.getNumVectors() = " <<
Y.getNumVectors () <<
".");
5509 getColMap ()->getNodeNumElements (), std::runtime_error,
5510 "NO_TRANS case: X has the wrong number of local rows. "
5511 "X.getLocalLength() = " <<
X.getLocalLength () <<
" != "
5512 "getColMap()->getNodeNumElements() = " <<
5513 getColMap ()->getNodeNumElements () <<
".");
5516 getRowMap ()->getNodeNumElements (), std::runtime_error,
5517 "NO_TRANS case: Y has the wrong number of local rows. "
5518 "Y.getLocalLength() = " <<
Y.getLocalLength () <<
" != "
5519 "getRowMap()->getNodeNumElements() = " <<
5520 getRowMap ()->getNodeNumElements () <<
".");
5523 getRowMap ()->getNodeNumElements (), std::runtime_error,
5524 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5525 "rows. X.getLocalLength() = " <<
X.getLocalLength ()
5526 <<
" != getRowMap()->getNodeNumElements() = "
5527 << getRowMap ()->getNodeNumElements () <<
".");
5530 getColMap ()->getNodeNumElements (), std::runtime_error,
5531 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5532 "rows. Y.getLocalLength() = " <<
Y.getLocalLength ()
5533 <<
" != getColMap()->getNodeNumElements() = "
5534 << getColMap ()->getNodeNumElements () <<
".");
5536 (! isFillComplete (), std::runtime_error,
"The matrix is not "
5537 "fill complete. You must call fillComplete() (possibly with "
5538 "domain and range Map arguments) without an intervening "
5539 "resumeFill() call before you may call this method.");
5541 (!
X.isConstantStride () || !
Y.isConstantStride (),
5542 std::runtime_error,
"X and Y must be constant stride.");
5547 std::runtime_error,
"X and Y may not alias one another.");
5553 maxRowImbalance = getNodeMaxNumRowEntries() - (getNodeNumEntries() / nrows);
5561 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5566 Teuchos::ETransp
mode,
5571 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5574 (! isFillComplete (), std::runtime_error,
5575 fnName <<
": Cannot call apply() until fillComplete() "
5576 "has been called.");
5578 if (
mode == Teuchos::NO_TRANS) {
5583 ProfilingRegion
regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5590 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5599 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5601 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5610 (! this->isFillComplete (), std::runtime_error,
"This matrix (the source "
5611 "of the conversion) is not fill complete. You must first call "
5612 "fillComplete() (possibly with the domain and range Map) without an "
5613 "intervening call to resumeFill(), before you may call this method.");
5619 using ::Tpetra::Details::copyConvert;
5620 copyConvert (
newMatrix->getLocalMatrixDevice ().values,
5621 this->getLocalMatrixDevice ().values);
5625 newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5631 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5639 const char err[] =
"Internal state is not consistent. "
5640 "Please report this bug to the Tpetra developers.";
5645 (staticGraph_.is_null (), std::logic_error,
err);
5650 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5651 std::logic_error,
err);
5654 (isFillComplete () && ! staticGraph_->isFillComplete (),
5655 std::logic_error,
err <<
" Specifically, the matrix is fill complete, "
5656 "but its graph is NOT fill complete.");
5660 (staticGraph_->indicesAreAllocated () &&
5661 staticGraph_->getNodeAllocationSize() > 0 &&
5662 staticGraph_->getNodeNumRows() > 0 &&
5663 valuesUnpacked_wdv.extent (0) == 0,
5664 std::logic_error,
err);
5668 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5673 std::ostringstream
os;
5675 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5679 if (isFillComplete ()) {
5680 os <<
"isFillComplete: true"
5681 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5682 << getGlobalNumCols () <<
"]"
5683 <<
", global number of entries: " << getGlobalNumEntries ()
5687 os <<
"isFillComplete: false"
5688 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5689 << getGlobalNumCols () <<
"]}";
5694 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5698 const Teuchos::EVerbosityLevel
verbLevel)
const
5702 using Teuchos::ArrayView;
5703 using Teuchos::Comm;
5705 using Teuchos::TypeNameTraits;
5706 using Teuchos::VERB_DEFAULT;
5707 using Teuchos::VERB_NONE;
5708 using Teuchos::VERB_LOW;
5709 using Teuchos::VERB_MEDIUM;
5710 using Teuchos::VERB_HIGH;
5711 using Teuchos::VERB_EXTREME;
5723 const int myRank = comm->getRank();
5724 const int numProcs = comm->getSize();
5726 for (
size_t dec=10;
dec<getGlobalNumRows();
dec *= 10) {
5729 width = std::max<size_t> (
width,
static_cast<size_t> (11)) + 2;
5739 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" <<
endl;
5748 out <<
"Template parameters:" <<
endl;
5755 if (isFillComplete()) {
5756 out <<
"isFillComplete: true" <<
endl
5757 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5758 << getGlobalNumCols () <<
"]" <<
endl
5759 <<
"Global number of entries: " << getGlobalNumEntries () <<
endl
5760 <<
endl <<
"Global max number of entries in a row: "
5761 << getGlobalMaxNumRowEntries () <<
endl;
5764 out <<
"isFillComplete: false" <<
endl
5765 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5766 << getGlobalNumCols () <<
"]" <<
endl;
5778 if (getRowMap ().
is_null ()) {
5787 getRowMap ()->describe (
out,
vl);
5792 out <<
"Column Map: ";
5794 if (getColMap ().
is_null ()) {
5798 }
else if (getColMap () == getRowMap ()) {
5800 out <<
"same as row Map" <<
endl;
5806 getColMap ()->describe (
out,
vl);
5811 out <<
"Domain Map: ";
5813 if (getDomainMap ().
is_null ()) {
5817 }
else if (getDomainMap () == getRowMap ()) {
5819 out <<
"same as row Map" <<
endl;
5821 }
else if (getDomainMap () == getColMap ()) {
5823 out <<
"same as column Map" <<
endl;
5829 getDomainMap ()->describe (
out,
vl);
5834 out <<
"Range Map: ";
5836 if (getRangeMap ().
is_null ()) {
5840 }
else if (getRangeMap () == getDomainMap ()) {
5842 out <<
"same as domain Map" <<
endl;
5844 }
else if (getRangeMap () == getRowMap ()) {
5846 out <<
"same as row Map" <<
endl;
5852 getRangeMap ()->describe (
out,
vl);
5860 if (! staticGraph_->indicesAreAllocated ()) {
5861 out <<
"Graph indices not allocated" <<
endl;
5864 out <<
"Number of allocated entries: "
5865 << staticGraph_->getNodeAllocationSize () <<
endl;
5867 out <<
"Number of entries: " << getNodeNumEntries () <<
endl
5868 <<
"Max number of entries per row: " << getNodeMaxNumRowEntries ()
5884 out << std::setw(
width) <<
"Proc Rank"
5885 << std::setw(
width) <<
"Global Row"
5886 << std::setw(
width) <<
"Num Entries";
5888 out << std::setw(
width) <<
"(Index,Value)";
5891 for (
size_t r = 0;
r < getNodeNumRows (); ++
r) {
5892 const size_t nE = getNumEntriesInLocalRow(
r);
5898 if (isGloballyIndexed()) {
5899 global_inds_host_view_type
rowinds;
5900 values_host_view_type
rowvals;
5902 for (
size_t j = 0;
j <
nE; ++
j) {
5908 else if (isLocallyIndexed()) {
5909 local_inds_host_view_type
rowinds;
5910 values_host_view_type
rowvals;
5912 for (
size_t j=0;
j <
nE; ++
j) {
5913 out <<
" (" << getColMap()->getGlobalElement(
rowinds[
j])
5930 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5947 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5951 const typename crs_graph_type::padding_type&
padding,
5957 using LO = local_ordinal_type;
5958 using row_ptrs_type =
5959 typename local_graph_device_type::row_map_type::non_const_type;
5961 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5964 ". Please report this bug to the Tpetra developers.";
5965 ProfilingRegion
regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5967 std::unique_ptr<std::string>
prefix;
5970 std::ostringstream
os;
5974 std::cerr <<
os.str();
5976 const int myRank = ! verbose ? -1 : [&] () {
5977 auto map = this->getMap();
5978 if (map.is_null()) {
5981 auto comm = map->getComm();
5982 if (comm.is_null()) {
5985 return comm->getRank();
5989 if (! myGraph_->indicesAreAllocated()) {
5991 std::ostringstream os;
5992 os << *prefix <<
"Call allocateIndices" << endl;
5993 std::cerr << os.str();
5995 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
6007 std::ostringstream os;
6008 os << *prefix <<
"Allocate row_ptrs_beg: "
6009 << myGraph_->rowPtrsUnpacked_host_.extent(0) << endl;
6010 std::cerr << os.str();
6012 using Kokkos::view_alloc;
6013 using Kokkos::WithoutInitializing;
6014 row_ptrs_type row_ptr_beg(
6015 view_alloc(
"row_ptr_beg", WithoutInitializing),
6016 myGraph_->rowPtrsUnpacked_dev_.extent(0));
6017 Kokkos::deep_copy(row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
6019 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
6020 size_t(row_ptr_beg.extent(0) - 1);
6022 std::ostringstream os;
6023 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
6024 std::cerr << os.str();
6026 row_ptrs_type row_ptr_end(
6027 view_alloc(
"row_ptr_end", WithoutInitializing), N);
6029 row_ptrs_type num_row_entries_d;
6031 const bool refill_num_row_entries =
6032 myGraph_->k_numRowEntries_.extent(0) != 0;
6034 if (refill_num_row_entries) {
6037 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
6038 myGraph_->k_numRowEntries_);
6039 Kokkos::parallel_for
6040 (
"Fill end row pointers", range_policy(0, N),
6041 KOKKOS_LAMBDA (
const size_t i) {
6042 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
6049 Kokkos::parallel_for
6050 (
"Fill end row pointers", range_policy(0, N),
6051 KOKKOS_LAMBDA (
const size_t i) {
6052 row_ptr_end(i) = row_ptr_beg(i+1);
6056 if (myGraph_->isGloballyIndexed()) {
6058 myGraph_->gblInds_wdv,
6059 valuesUnpacked_wdv, padding, myRank, verbose);
6060 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
6061 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
6062 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6063 (newValuesLen != newColIndsLen, std::logic_error,
6064 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
6065 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
6070 myGraph_->lclIndsUnpacked_wdv,
6071 valuesUnpacked_wdv, padding, myRank, verbose);
6072 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
6073 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
6074 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6075 (newValuesLen != newColIndsLen, std::logic_error,
6076 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
6077 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
6081 if (refill_num_row_entries) {
6082 Kokkos::parallel_for
6083 (
"Fill num entries", range_policy(0, N),
6084 KOKKOS_LAMBDA (
const size_t i) {
6085 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
6087 Kokkos::deep_copy(myGraph_->k_numRowEntries_, num_row_entries_d);
6091 std::ostringstream os;
6092 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
6093 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
6094 <<
", new size: " << row_ptr_beg.extent(0) << endl;
6095 std::cerr << os.str();
6096 TEUCHOS_ASSERT( myGraph_->rowPtrsUnpacked_host_.extent(0) ==
6097 row_ptr_beg.extent(0) );
6099 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
6102 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6104 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6105 copyAndPermuteStaticGraph(
6106 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
6107 const size_t numSameIDs,
6108 const LocalOrdinal permuteToLIDs[],
6109 const LocalOrdinal permuteFromLIDs[],
6110 const size_t numPermutes)
6112 using Details::ProfilingRegion;
6113 using Teuchos::Array;
6114 using Teuchos::ArrayView;
6116 using LO = LocalOrdinal;
6117 using GO = GlobalOrdinal;
6118 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
6119 const char suffix[] =
6120 " Please report this bug to the Tpetra developers.";
6121 ProfilingRegion regionCAP
6122 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
6126 std::unique_ptr<std::string> prefix;
6128 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
6129 std::ostringstream os;
6130 os << *prefix <<
"Start" << endl;
6132 const char*
const prefix_raw =
6133 verbose ? prefix.get()->c_str() :
nullptr;
6135 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
6140 const map_type& srcRowMap = * (srcMat.getRowMap ());
6141 nonconst_global_inds_host_view_type rowInds;
6142 nonconst_values_host_view_type rowVals;
6143 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
6144 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
6148 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
6149 const GO targetGID = sourceGID;
6151 ArrayView<const GO>rowIndsConstView;
6152 ArrayView<const Scalar> rowValsConstView;
6154 if (sourceIsLocallyIndexed) {
6155 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6156 if (rowLength >
static_cast<size_t> (rowInds.size())) {
6157 Kokkos::resize(rowInds,rowLength);
6158 Kokkos::resize(rowVals,rowLength);
6162 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
6163 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
6168 size_t checkRowLength = 0;
6169 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
6170 rowValsView, checkRowLength);
6172 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6173 (rowLength != checkRowLength, std::logic_error,
"For "
6174 "global row index " << sourceGID <<
", the source "
6175 "matrix's getNumEntriesInGlobalRow returns a row length "
6176 "of " << rowLength <<
", but getGlobalRowCopy reports "
6177 "a row length of " << checkRowLength <<
"." << suffix);
6184 rowIndsConstView = Teuchos::ArrayView<const GO> (
6185 rowIndsView.data(), rowIndsView.extent(0),
6186 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6187 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6188 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6189 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6194 global_inds_host_view_type rowIndsView;
6195 values_host_view_type rowValsView;
6196 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6201 rowIndsConstView = Teuchos::ArrayView<const GO> (
6202 rowIndsView.data(), rowIndsView.extent(0),
6203 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6204 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6205 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6206 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6214 combineGlobalValues(targetGID, rowIndsConstView,
6216 prefix_raw, debug, verbose);
6220 std::ostringstream os;
6221 os << *prefix <<
"Do permutes" << endl;
6224 const map_type& tgtRowMap = * (this->getRowMap ());
6225 for (
size_t p = 0; p < numPermutes; ++p) {
6226 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
6227 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
6229 ArrayView<const GO> rowIndsConstView;
6230 ArrayView<const Scalar> rowValsConstView;
6232 if (sourceIsLocallyIndexed) {
6233 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6234 if (rowLength >
static_cast<size_t> (rowInds.size ())) {
6235 Kokkos::resize(rowInds,rowLength);
6236 Kokkos::resize(rowVals,rowLength);
6240 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
6241 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
6246 size_t checkRowLength = 0;
6247 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6248 rowValsView, checkRowLength);
6250 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6251 (rowLength != checkRowLength, std::logic_error,
"For "
6252 "source matrix global row index " << sourceGID <<
", "
6253 "getNumEntriesInGlobalRow returns a row length of " <<
6254 rowLength <<
", but getGlobalRowCopy a row length of "
6255 << checkRowLength <<
"." << suffix);
6262 rowIndsConstView = Teuchos::ArrayView<const GO> (
6263 rowIndsView.data(), rowIndsView.extent(0),
6264 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6265 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6266 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6267 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6272 global_inds_host_view_type rowIndsView;
6273 values_host_view_type rowValsView;
6274 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6279 rowIndsConstView = Teuchos::ArrayView<const GO> (
6280 rowIndsView.data(), rowIndsView.extent(0),
6281 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6282 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6283 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6284 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6289 combineGlobalValues(targetGID, rowIndsConstView,
6291 prefix_raw, debug, verbose);
6295 std::ostringstream os;
6296 os << *prefix <<
"Done" << endl;
6300 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6302 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6303 copyAndPermuteNonStaticGraph(
6304 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
6305 const size_t numSameIDs,
6306 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
6307 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
6308 const size_t numPermutes)
6310 using Details::ProfilingRegion;
6311 using Teuchos::Array;
6312 using Teuchos::ArrayView;
6314 using LO = LocalOrdinal;
6315 using GO = GlobalOrdinal;
6316 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
6317 const char suffix[] =
6318 " Please report this bug to the Tpetra developers.";
6319 ProfilingRegion regionCAP
6320 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
6324 std::unique_ptr<std::string> prefix;
6326 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
6327 std::ostringstream os;
6328 os << *prefix <<
"Start" << endl;
6330 const char*
const prefix_raw =
6331 verbose ? prefix.get()->c_str() :
nullptr;
6334 using row_graph_type = RowGraph<LO, GO, Node>;
6335 const row_graph_type& srcGraph = *(srcMat.getGraph());
6337 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
6338 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
6339 applyCrsPadding(*padding, verbose);
6341 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
6346 const map_type& srcRowMap = * (srcMat.getRowMap ());
6347 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
6348 using gids_type = nonconst_global_inds_host_view_type;
6349 using vals_type = nonconst_values_host_view_type;
6352 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
6356 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
6357 const GO targetGID = sourceGID;
6359 ArrayView<const GO> rowIndsConstView;
6360 ArrayView<const Scalar> rowValsConstView;
6362 if (sourceIsLocallyIndexed) {
6364 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6365 if (rowLength >
static_cast<size_t> (rowInds.extent(0))) {
6366 Kokkos::resize(rowInds,rowLength);
6367 Kokkos::resize(rowVals,rowLength);
6371 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
6372 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
6377 size_t checkRowLength = 0;
6378 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
6381 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6382 (rowLength != checkRowLength, std::logic_error,
": For "
6383 "global row index " << sourceGID <<
", the source "
6384 "matrix's getNumEntriesInGlobalRow returns a row length "
6385 "of " << rowLength <<
", but getGlobalRowCopy reports "
6386 "a row length of " << checkRowLength <<
"." << suffix);
6388 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
6389 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
6392 global_inds_host_view_type rowIndsView;
6393 values_host_view_type rowValsView;
6394 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6400 rowIndsConstView = Teuchos::ArrayView<const GO> (
6401 rowIndsView.data(), rowIndsView.extent(0),
6402 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6403 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6404 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6405 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6411 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6412 rowValsConstView, prefix_raw, debug, verbose);
6416 std::ostringstream os;
6417 os << *prefix <<
"Do permutes" << endl;
6419 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
6420 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
6422 const map_type& tgtRowMap = * (this->getRowMap ());
6423 for (
size_t p = 0; p < numPermutes; ++p) {
6424 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
6425 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
6427 ArrayView<const GO> rowIndsConstView;
6428 ArrayView<const Scalar> rowValsConstView;
6430 if (sourceIsLocallyIndexed) {
6431 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6432 if (rowLength >
static_cast<size_t> (rowInds.extent(0))) {
6433 Kokkos::resize(rowInds,rowLength);
6434 Kokkos::resize(rowVals,rowLength);
6438 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
6439 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
6444 size_t checkRowLength = 0;
6445 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6446 rowValsView, checkRowLength);
6448 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6449 (rowLength != checkRowLength, std::logic_error,
"For "
6450 "source matrix global row index " << sourceGID <<
", "
6451 "getNumEntriesInGlobalRow returns a row length of " <<
6452 rowLength <<
", but getGlobalRowCopy a row length of "
6453 << checkRowLength <<
"." << suffix);
6455 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
6456 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
6459 global_inds_host_view_type rowIndsView;
6460 values_host_view_type rowValsView;
6461 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6467 rowIndsConstView = Teuchos::ArrayView<const GO> (
6468 rowIndsView.data(), rowIndsView.extent(0),
6469 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6470 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6471 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6472 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6478 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6479 rowValsConstView, prefix_raw, debug, verbose);
6483 std::ostringstream os;
6484 os << *prefix <<
"Done" << endl;
6488 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6490 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6492 const SrcDistObject& srcObj,
6493 const size_t numSameIDs,
6494 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6495 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6498 using Details::Behavior;
6500 using Details::ProfilingRegion;
6504 const char tfecfFuncName[] =
"copyAndPermute: ";
6505 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6507 const bool verbose = Behavior::verbose(
"CrsMatrix");
6508 std::unique_ptr<std::string> prefix;
6510 prefix = this->
createPrefix(
"CrsMatrix",
"copyAndPermute");
6511 std::ostringstream os;
6512 os << *prefix << endl
6513 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6514 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6523 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
6525 std::cerr << os.str ();
6528 const auto numPermute = permuteToLIDs.extent (0);
6529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6530 (numPermute != permuteFromLIDs.extent (0),
6531 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6532 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6533 << permuteFromLIDs.extent (0) <<
".");
6537 using RMT = RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
6538 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6539 if (isStaticGraph ()) {
6540 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6541 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6542 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6543 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6545 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6546 permuteToLIDs_h.data(),
6547 permuteFromLIDs_h.data(),
6551 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6552 permuteFromLIDs, numPermute);
6556 std::ostringstream os;
6557 os << *prefix <<
"Done" << endl;
6558 std::cerr << os.str();
6562 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6564 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6566 (
const SrcDistObject& source,
6567 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6568 Kokkos::DualView<char*, buffer_device_type>& exports,
6569 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6570 size_t& constantNumPackets)
6572 using Details::Behavior;
6574 using Details::ProfilingRegion;
6575 using Teuchos::outArg;
6576 using Teuchos::REDUCE_MAX;
6577 using Teuchos::reduceAll;
6579 typedef LocalOrdinal LO;
6580 typedef GlobalOrdinal GO;
6581 const char tfecfFuncName[] =
"packAndPrepare: ";
6582 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6584 const bool debug = Behavior::debug(
"CrsMatrix");
6585 const bool verbose = Behavior::verbose(
"CrsMatrix");
6588 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6589 if (pComm.is_null ()) {
6592 const Teuchos::Comm<int>& comm = *pComm;
6593 const int myRank = comm.getSize ();
6595 std::unique_ptr<std::string> prefix;
6597 prefix = this->
createPrefix(
"CrsMatrix",
"packAndPrepare");
6598 std::ostringstream os;
6599 os << *prefix <<
"Start" << endl
6609 std::cerr << os.str ();
6632 std::ostringstream msg;
6635 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6636 const crs_matrix_type* srcCrsMat =
6637 dynamic_cast<const crs_matrix_type*
> (&source);
6638 if (srcCrsMat !=
nullptr) {
6640 std::ostringstream os;
6641 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6642 "calling packNew" << endl;
6643 std::cerr << os.str ();
6646 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6647 constantNumPackets);
6649 catch (std::exception& e) {
6651 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6655 using Kokkos::HostSpace;
6656 using Kokkos::subview;
6657 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6658 using range_type = Kokkos::pair<size_t, size_t>;
6661 std::ostringstream os;
6662 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6664 std::cerr << os.str ();
6667 const row_matrix_type* srcRowMat =
6668 dynamic_cast<const row_matrix_type*
> (&source);
6669 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6670 (srcRowMat ==
nullptr, std::invalid_argument,
6671 "The source object of the Import or Export operation is neither a "
6672 "CrsMatrix (with the same template parameters as the target object), "
6673 "nor a RowMatrix (with the same first four template parameters as the "
6684 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6685 auto exportLIDs_h = exportLIDs.view_host ();
6686 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6687 exportLIDs_h.size ());
6691 Teuchos::Array<char> exports_a;
6697 numPacketsPerLID.clear_sync_state ();
6698 numPacketsPerLID.modify_host ();
6699 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6700 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6701 numPacketsPerLID_h.size ());
6706 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6707 constantNumPackets);
6709 catch (std::exception& e) {
6711 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6715 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6716 if (
static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6717 const std::string oldLabel = exports.d_view.label ();
6718 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6719 exports = exports_type (newLabel, newAllocSize);
6724 exports.modify_host();
6726 auto exports_h = exports.view_host ();
6727 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6731 typedef typename exports_type::t_host::execution_space HES;
6732 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6733 Kokkos::View<const char*, host_device_type>
6734 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6735 Kokkos::deep_copy (exports_h_sub, exports_a_kv);
6740 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6743 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6744 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6745 "one or more participating processes.");
6749 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6750 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6751 "or more participating processes. Here is this process' error "
6752 "message: " << msg.str ());
6756 std::ostringstream os;
6757 os << *prefix <<
"packAndPrepare: Done!" << endl
6767 std::cerr << os.str ();
6771 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6773 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6774 packRow (
char exports[],
6775 const size_t offset,
6776 const size_t numEnt,
6777 const GlobalOrdinal gidsIn[],
6778 const impl_scalar_type valsIn[],
6779 const size_t numBytesPerValue)
const
6782 using Kokkos::subview;
6784 typedef LocalOrdinal LO;
6785 typedef GlobalOrdinal GO;
6786 typedef impl_scalar_type ST;
6794 const LO numEntLO =
static_cast<size_t> (numEnt);
6796 const size_t numEntBeg = offset;
6797 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
6798 const size_t gidsBeg = numEntBeg + numEntLen;
6799 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6800 const size_t valsBeg = gidsBeg + gidsLen;
6801 const size_t valsLen = numEnt * numBytesPerValue;
6803 char*
const numEntOut = exports + numEntBeg;
6804 char*
const gidsOut = exports + gidsBeg;
6805 char*
const valsOut = exports + valsBeg;
6807 size_t numBytesOut = 0;
6809 numBytesOut += PackTraits<LO>::packValue (numEntOut, numEntLO);
6812 Kokkos::pair<int, size_t> p;
6813 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6814 errorCode += p.first;
6815 numBytesOut += p.second;
6817 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6818 errorCode += p.first;
6819 numBytesOut += p.second;
6822 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6823 TEUCHOS_TEST_FOR_EXCEPTION
6824 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6825 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6826 << expectedNumBytes <<
".");
6827 TEUCHOS_TEST_FOR_EXCEPTION
6828 (errorCode != 0, std::runtime_error,
"packRow: "
6829 "PackTraits::packArray returned a nonzero error code");
6834 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6836 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6837 unpackRow (GlobalOrdinal gidsOut[],
6838 impl_scalar_type valsOut[],
6839 const char imports[],
6840 const size_t offset,
6841 const size_t numBytes,
6842 const size_t numEnt,
6843 const size_t numBytesPerValue)
6846 using Kokkos::subview;
6848 typedef LocalOrdinal LO;
6849 typedef GlobalOrdinal GO;
6850 typedef impl_scalar_type ST;
6852 Details::ProfilingRegion region_upack_row(
6853 "Tpetra::CrsMatrix::unpackRow",
6857 if (numBytes == 0) {
6860 const int myRank = this->getMap ()->getComm ()->getRank ();
6861 TEUCHOS_TEST_FOR_EXCEPTION
6862 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6863 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6864 "number of entries to unpack (as reported by numPacketsPerLID) "
6865 "for this row numEnt=" << numEnt <<
" != 0.");
6870 if (numEnt == 0 && numBytes != 0) {
6871 const int myRank = this->getMap ()->getComm ()->getRank ();
6872 TEUCHOS_TEST_FOR_EXCEPTION
6873 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6874 "unpackRow: The number of entries to unpack (as reported by "
6875 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6876 "numBytes=" << numBytes <<
" != 0.");
6882 const size_t numEntBeg = offset;
6883 const size_t numEntLen = PackTraits<LO>::packValueCount (lid);
6884 const size_t gidsBeg = numEntBeg + numEntLen;
6885 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6886 const size_t valsBeg = gidsBeg + gidsLen;
6887 const size_t valsLen = numEnt * numBytesPerValue;
6889 const char*
const numEntIn = imports + numEntBeg;
6890 const char*
const gidsIn = imports + gidsBeg;
6891 const char*
const valsIn = imports + valsBeg;
6893 size_t numBytesOut = 0;
6896 numBytesOut += PackTraits<LO>::unpackValue (numEntOut, numEntIn);
6897 if (
static_cast<size_t> (numEntOut) != numEnt ||
6898 numEntOut ==
static_cast<LO
> (0)) {
6899 const int myRank = this->getMap ()->getComm ()->getRank ();
6900 std::ostringstream os;
6901 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6902 bool firstErrorCondition =
false;
6903 if (
static_cast<size_t> (numEntOut) != numEnt) {
6904 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6905 <<
" does not equal number of entries unpacked from imports "
6906 "buffer numEntOut=" << numEntOut <<
".";
6907 firstErrorCondition =
true;
6909 if (numEntOut ==
static_cast<LO
> (0)) {
6910 if (firstErrorCondition) {
6913 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6914 "but number of bytes to unpack for this row numBytes=" << numBytes
6915 <<
" != 0. This should never happen, since packRow should only "
6916 "ever pack rows with a nonzero number of entries. In this case, "
6917 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6920 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6924 Kokkos::pair<int, size_t> p;
6925 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6926 errorCode += p.first;
6927 numBytesOut += p.second;
6929 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6930 errorCode += p.first;
6931 numBytesOut += p.second;
6934 TEUCHOS_TEST_FOR_EXCEPTION
6935 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6936 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6938 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6939 TEUCHOS_TEST_FOR_EXCEPTION
6940 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6941 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6942 << expectedNumBytes <<
".");
6944 TEUCHOS_TEST_FOR_EXCEPTION
6945 (errorCode != 0, std::runtime_error,
"unpackRow: "
6946 "PackTraits::unpackArray returned a nonzero error code");
6951 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6953 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6954 allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6955 size_t& totalNumEntries,
6956 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6958 using Details::Behavior;
6961 typedef impl_scalar_type IST;
6962 typedef LocalOrdinal LO;
6963 typedef GlobalOrdinal GO;
6969 const bool verbose = Behavior::verbose(
"CrsMatrix");
6970 std::unique_ptr<std::string> prefix;
6972 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6973 std::ostringstream os;
6974 os << *prefix <<
"Before:"
6982 std::cerr << os.str ();
6987 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6989 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6990 auto exportLIDs_h = exportLIDs.view_host ();
6993 totalNumEntries = 0;
6994 for (LO i = 0; i < numExportLIDs; ++i) {
6995 const LO lclRow = exportLIDs_h[i];
6996 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6999 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
7002 totalNumEntries += curNumEntries;
7013 const size_t allocSize =
7014 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
7015 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
7016 if (
static_cast<size_t> (exports.extent (0)) < allocSize) {
7017 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
7019 const std::string oldLabel = exports.d_view.label ();
7020 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
7021 exports = exports_type (newLabel, allocSize);
7025 std::ostringstream os;
7026 os << *prefix <<
"After:"
7034 std::cerr << os.str ();
7038 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7041 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
exportLIDs,
7042 Kokkos::DualView<char*, buffer_device_type>& exports,
7048 if (this->isStaticGraph ()) {
7049 using ::Tpetra::Details::packCrsMatrixNew;
7059 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7063 Kokkos::DualView<char*, buffer_device_type>& exports,
7075 using ST = impl_scalar_type;
7078 const bool verbose = Behavior::verbose(
"CrsMatrix");
7079 std::unique_ptr<std::string>
prefix;
7081 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
7082 std::ostringstream
os;
7084 std::cerr <<
os.str ();
7087 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
7088 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7089 (numExportLIDs !=
static_cast<size_t> (numPacketsPerLID.extent (0)),
7090 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
7091 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
7097 constantNumPackets = 0;
7102 size_t totalNumEntries = 0;
7103 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
7104 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
7107 exports.clear_sync_state();
7108 exports.modify_host();
7109 auto exports_h = exports.view_host ();
7111 std::ostringstream os;
7112 os << *prefix <<
"After marking exports as modified on host, "
7113 << dualViewStatusToString (exports,
"exports") << endl;
7114 std::cerr << os.str ();
7118 auto exportLIDs_h = exportLIDs.view_host ();
7121 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
7122 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
7123 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
7128 auto maxRowNumEnt = this->getNodeMaxNumRowEntries();
7132 typename global_inds_host_view_type::non_const_type gidsIn_k;
7133 if (this->isLocallyIndexed()) {
7135 typename global_inds_host_view_type::non_const_type(
"packGids",
7140 for (
size_t i = 0; i < numExportLIDs; ++i) {
7141 const LO lclRow = exportLIDs_h[i];
7145 numEnt = this->getNumEntriesInLocalRow (lclRow);
7152 numPacketsPerLID_h[i] = 0;
7156 if (this->isLocallyIndexed ()) {
7157 typename global_inds_host_view_type::non_const_type gidsIn;
7158 values_host_view_type valsIn;
7162 local_inds_host_view_type lidsIn;
7163 this->getLocalRowView (lclRow, lidsIn, valsIn);
7164 const map_type&
colMap = * (this->getColMap ());
7165 for (
size_t k = 0; k < numEnt; ++k) {
7166 gidsIn_k[k] =
colMap.getGlobalElement (lidsIn[k]);
7168 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
7170 const size_t numBytesPerValue =
7171 PackTraits<ST>::packValueCount (valsIn[0]);
7172 numBytes = this->
packRow (exports_h.data (), offset, numEnt,
7173 gidsIn.data (), valsIn.data (),
7176 else if (this->isGloballyIndexed ()) {
7177 global_inds_host_view_type gidsIn;
7178 values_host_view_type valsIn;
7184 const map_type&
rowMap = * (this->getRowMap ());
7185 const GO gblRow =
rowMap.getGlobalElement (lclRow);
7186 this->getGlobalRowView (gblRow, gidsIn, valsIn);
7188 const size_t numBytesPerValue =
7189 PackTraits<ST>::packValueCount (valsIn[0]);
7190 numBytes = this->
packRow (exports_h.data (), offset, numEnt,
7191 gidsIn.data (), valsIn.data (),
7198 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7199 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
7200 "First invalid offset into 'exports' pack buffer at index i = " << i
7201 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
7202 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
7207 numPacketsPerLID_h[i] = numBytes;
7212 std::ostringstream os;
7213 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
7220 std::cerr << os.str ();
7224 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7226 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7227 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
7228 const LocalOrdinal numEnt,
7229 const impl_scalar_type vals[],
7230 const GlobalOrdinal cols[],
7232 const char*
const prefix,
7236 using GO = GlobalOrdinal;
7240 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
7241 Teuchos::ArrayView<const GO> cols_av
7242 (numEnt == 0 ?
nullptr : cols, numEnt);
7243 Teuchos::ArrayView<const Scalar> vals_av
7244 (numEnt == 0 ?
nullptr : reinterpret_cast<const Scalar*> (vals), numEnt);
7249 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
7250 prefix, debug, verbose);
7254 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7256 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7257 combineGlobalValues(
7258 const GlobalOrdinal globalRowIndex,
7259 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
7260 const Teuchos::ArrayView<const Scalar>& values,
7262 const char*
const prefix,
7266 const char tfecfFuncName[] =
"combineGlobalValues: ";
7268 if (isStaticGraph ()) {
7272 if (combineMode ==
ADD) {
7273 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
7275 else if (combineMode ==
REPLACE) {
7276 replaceGlobalValues (globalRowIndex, columnIndices, values);
7278 else if (combineMode ==
ABSMAX) {
7279 using ::Tpetra::Details::AbsMax;
7281 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
7285 else if (combineMode ==
INSERT) {
7286 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7287 (isStaticGraph() && combineMode ==
INSERT,
7288 std::invalid_argument,
"INSERT combine mode is forbidden "
7289 "if the matrix has a static (const) graph (i.e., was "
7290 "constructed with the CrsMatrix constructor that takes a "
7291 "const CrsGraph pointer).");
7294 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7295 (
true, std::logic_error,
"Invalid combine mode; should "
7297 "Please report this bug to the Tpetra developers.");
7301 if (combineMode ==
ADD || combineMode ==
INSERT) {
7308 insertGlobalValuesFilteredChecked(globalRowIndex,
7309 columnIndices, values, prefix, debug, verbose);
7320 else if (combineMode ==
ABSMAX) {
7321 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7322 ! isStaticGraph () && combineMode ==
ABSMAX, std::logic_error,
7323 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
7326 else if (combineMode ==
REPLACE) {
7327 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7328 ! isStaticGraph () && combineMode ==
REPLACE, std::logic_error,
7329 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
7333 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7334 true, std::logic_error,
"Should never get here! Please report this "
7335 "bug to the Tpetra developers.");
7340 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7344 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>&
importLIDs,
7345 Kokkos::DualView<char*, buffer_device_type> imports,
7355 ProfilingRegion
regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
7357 const bool debug = Behavior::debug(
"CrsMatrix");
7358 const bool verbose = Behavior::verbose(
"CrsMatrix");
7363 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
7365 std::unique_ptr<std::string>
prefix;
7367 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
7368 std::ostringstream
os;
7371 << dualViewStatusToString (
importLIDs,
"importLIDs")
7374 << dualViewStatusToString (imports,
"imports")
7383 std::cerr <<
os.str ();
7389 std::ostringstream
os;
7390 os <<
"Invalid combine mode. Valid modes are {";
7399 (
true, std::invalid_argument,
os.str ());
7403 std::invalid_argument,
"importLIDs.extent(0)="
7405 <<
" != numPacketsPerLID.extent(0)="
7414 using Teuchos::reduceAll;
7415 std::unique_ptr<std::ostringstream>
msg (
new std::ostringstream ());
7421 }
catch (std::exception&
e) {
7426 const Teuchos::Comm<int>& comm = * (this->getComm ());
7434 std::ostringstream
os;
7435 os <<
"Proc " << comm.getRank () <<
": " <<
msg->str () <<
endl;
7436 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
7439 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
7440 "threw an exception on one or more participating processes: "
7451 std::ostringstream
os;
7454 << dualViewStatusToString (
importLIDs,
"importLIDs")
7457 << dualViewStatusToString (imports,
"imports")
7462 std::cerr <<
os.str ();
7466 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7470 const Kokkos::DualView<
const local_ordinal_type*,
7472 Kokkos::DualView<char*, buffer_device_type> imports,
7479 "Tpetra::CrsMatrix::unpackAndCombineImpl",
7484 std::unique_ptr<std::string>
prefix;
7487 std::ostringstream
os;
7488 os << *
prefix <<
"isStaticGraph(): "
7489 << (isStaticGraph() ?
"true" :
"false")
7490 <<
", importLIDs.extent(0): "
7492 <<
", imports.extent(0): "
7493 << imports.extent(0)
7494 <<
", numPacketsPerLID.extent(0): "
7497 std::cerr <<
os.str();
7500 if (isStaticGraph ()) {
7501 using Details::unpackCrsMatrixAndCombineNew;
7502 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7503 importLIDs, constantNumPackets,
7508 using padding_type =
typename crs_graph_type::padding_type;
7509 std::unique_ptr<padding_type> padding;
7511 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7512 importLIDs, imports, numPacketsPerLID, verbose);
7514 catch (std::exception& e) {
7515 const auto rowMap = getRowMap();
7516 const auto comm =
rowMap.is_null() ? Teuchos::null :
7518 const int myRank = comm.is_null() ? -1 : comm->getRank();
7519 TEUCHOS_TEST_FOR_EXCEPTION
7520 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7521 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7522 "threw an exception: " << e.what());
7525 std::ostringstream os;
7526 os << *prefix <<
"Call applyCrsPadding" << endl;
7527 std::cerr << os.str();
7529 applyCrsPadding(*padding, verbose);
7532 std::ostringstream os;
7533 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7534 std::cerr << os.str();
7536 unpackAndCombineImplNonStatic(importLIDs, imports,
7543 std::ostringstream os;
7544 os << *prefix <<
"Done" << endl;
7545 std::cerr << os.str();
7549 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7551 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7552 unpackAndCombineImplNonStatic(
7553 const Kokkos::DualView<
const local_ordinal_type*,
7554 buffer_device_type>& importLIDs,
7555 Kokkos::DualView<char*, buffer_device_type> imports,
7556 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7557 const size_t constantNumPackets,
7561 using Kokkos::subview;
7562 using Kokkos::MemoryUnmanaged;
7563 using Details::Behavior;
7566 using Details::PackTraits;
7567 using Details::ScalarViewTraits;
7569 using LO = LocalOrdinal;
7570 using GO = GlobalOrdinal;
7571 using ST = impl_scalar_type;
7572 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7574 typename View<int*, device_type>::HostMirror::execution_space;
7575 using pair_type = std::pair<typename View<int*, HES>::size_type,
7576 typename View<int*, HES>::size_type>;
7577 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7578 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7579 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7581 const bool debug = Behavior::debug(
"CrsMatrix");
7582 const bool verbose = Behavior::verbose(
"CrsMatrix");
7583 std::unique_ptr<std::string> prefix;
7585 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7586 std::ostringstream os;
7587 os << *prefix << endl;
7588 std::cerr << os.str ();
7590 const char*
const prefix_raw =
7591 verbose ? prefix.get()->c_str() :
nullptr;
7593 const size_type numImportLIDs = importLIDs.extent (0);
7594 if (combineMode ==
ZERO || numImportLIDs == 0) {
7598 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
7599 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7604 if (imports.need_sync_host()) {
7605 imports.sync_host ();
7607 auto imports_h = imports.view_host();
7610 if (numPacketsPerLID.need_sync_host()) {
7611 numPacketsPerLID.sync_host ();
7613 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7615 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7616 auto importLIDs_h = importLIDs.view_host();
7618 size_t numBytesPerValue;
7629 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7634 size_t maxRowNumEnt = 0;
7635 for (size_type i = 0; i < numImportLIDs; ++i) {
7636 const size_t numBytes = numPacketsPerLID_h[i];
7637 if (numBytes == 0) {
7642 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7643 (offset + numBytes >
size_t(imports_h.extent (0)),
7644 std::logic_error,
": At local row index importLIDs_h[i="
7645 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7646 <<
") + numBytes (=" << numBytes <<
") > "
7647 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7652 const size_t theNumBytes =
7653 PackTraits<LO>::packValueCount (numEntLO);
7654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7655 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7656 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7658 const char*
const inBuf = imports_h.data () + offset;
7659 const size_t actualNumBytes =
7660 PackTraits<LO>::unpackValue (numEntLO, inBuf);
7663 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7664 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7665 <<
", actualNumBytes=" << actualNumBytes
7666 <<
" > numBytes=" << numBytes <<
".");
7667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7668 (numEntLO == 0, std::logic_error,
": At local row index "
7669 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7670 "the number of entries read from the packed data is "
7671 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7675 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7683 View<GO*, HES> gblColInds;
7684 View<LO*, HES> lclColInds;
7685 View<ST*, HES> vals;
7698 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7699 gid, maxRowNumEnt,
"gids");
7700 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7701 lid, maxRowNumEnt,
"lids");
7702 vals = ScalarViewTraits<ST, HES>::allocateArray(
7703 val, maxRowNumEnt,
"vals");
7707 for (size_type i = 0; i < numImportLIDs; ++i) {
7708 const size_t numBytes = numPacketsPerLID_h[i];
7709 if (numBytes == 0) {
7713 const char*
const inBuf = imports_h.data () + offset;
7714 (void) PackTraits<LO>::unpackValue (numEntLO, inBuf);
7716 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7717 const LO lclRow = importLIDs_h[i];
7719 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7720 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7722 const size_t numBytesOut =
7723 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7724 offset, numBytes, numEnt, numBytesPerValue);
7725 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7726 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7727 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7728 << numBytesOut <<
".");
7730 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7731 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7732 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7733 combineMode, prefix_raw, debug, verbose);
7739 std::ostringstream os;
7740 os << *prefix <<
"Done" << endl;
7741 std::cerr << os.str();
7745 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7746 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7749 const bool force)
const
7751 using Teuchos::null;
7756 ! this->hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7757 "MapMultiVector: You may only call this method if the matrix has a "
7758 "column Map. If the matrix does not yet have a column Map, you should "
7759 "first call fillComplete (with domain and range Map if necessary).");
7764 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7765 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7766 "this matrix's graph is fill complete.");
7784 if (importMV_.is_null () || importMV_->getNumVectors () !=
numVecs) {
7802 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7803 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7806 const bool force)
const
7808 using Teuchos::null;
7815 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7816 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7817 "matrix's graph is fill complete.");
7837 if (exportMV_.is_null () || exportMV_->getNumVectors () !=
numVecs) {
7848 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7854 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7855 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7856 "was created with a constant graph (that is, when it was created using "
7857 "the version of its constructor that takes an RCP<const CrsGraph>). "
7858 "This is because the matrix is not allowed to modify the graph in that "
7859 "case, but removing empty processes requires modifying the graph.");
7860 myGraph_->removeEmptyProcessesInPlace (
newMap);
7864 this->map_ = this->getRowMap ();
7868 staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
7871 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7872 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7877 const Teuchos::RCP<const map_type>& domainMap,
7878 const Teuchos::RCP<const map_type>&
rangeMap,
7879 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
7881 using Teuchos::Array;
7882 using Teuchos::ArrayView;
7883 using Teuchos::ParameterList;
7886 using Teuchos::rcp_implicit_cast;
7887 using Teuchos::sublist;
7891 using crs_matrix_type =
7893 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7897 std::unique_ptr<std::string>
prefix;
7899 prefix = this->createPrefix(
"CrsMatrix",
"add");
7900 std::ostringstream
os;
7902 std::cerr <<
os.str ();
7905 const crs_matrix_type&
B = *
this;
7906 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7907 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7925 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7926 "then you must supply a nonnull domain Map to this method.");
7935 A_rangeMap.is_null (), std::invalid_argument,
7936 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7937 "then you must supply a nonnull range Map to this method.");
7952 std::invalid_argument,
7953 errPfx <<
"The input RowMatrix A must have a domain Map "
7954 "which is the same as (isSameAs) this RowMatrix's "
7958 errPfx <<
"The input RowMatrix A must have a range Map "
7959 "which is the same as (isSameAs) this RowMatrix's range "
7964 std::invalid_argument,
7965 errPfx <<
"The input domain Map must be the same as "
7966 "(isSameAs) this RowMatrix's domain Map.");
7970 std::invalid_argument,
7971 errPfx <<
"The input range Map must be the same as "
7972 "(isSameAs) this RowMatrix's range Map.");
7979 std::invalid_argument,
7980 errPfx <<
"The input domain Map must be the same as "
7981 "(isSameAs) this RowMatrix's domain Map.");
7984 std::invalid_argument,
7985 errPfx <<
"The input range Map must be the same as "
7986 "(isSameAs) this RowMatrix's range Map.");
7991 std::invalid_argument,
errPfx <<
"If neither A nor B "
7992 "have a domain and range Map, then you must supply a "
7993 "nonnull domain and range Map to this method.");
8003 if (!
params.is_null()) {
8026 for (LO localRow = 0; localRow <
localNumRows; ++localRow) {
8027 const size_t A_numEntries =
A.getNumEntriesInLocalRow (localRow);
8033 for (LO localRow = 0; localRow <
localNumRows; ++localRow) {
8034 const size_t B_numEntries =
B.getNumEntriesInLocalRow (localRow);
8056 (
true, std::invalid_argument,
errPfx <<
"The row maps must "
8057 "be the same for statically allocated matrices, to ensure "
8058 "that there is sufficient space to do the addition.");
8062 (
C.is_null (), std::logic_error,
8063 errPfx <<
"C should not be null at this point. "
8064 "Please report this bug to the Tpetra developers.");
8067 std::ostringstream
os;
8068 os << *
prefix <<
"Compute C = alpha*A + beta*B" <<
endl;
8069 std::cerr <<
os.str ();
8071 using gids_type = nonconst_global_inds_host_view_type;
8072 using vals_type = nonconst_values_host_view_type;
8126 std::ostringstream
os;
8128 std::cerr <<
os.str ();
8137 std::ostringstream
os;
8138 os << *
prefix <<
"Do NOT call fillComplete on C" <<
endl;
8139 std::cerr <<
os.str ();
8143 std::ostringstream
os;
8145 std::cerr <<
os.str ();
8152 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8156 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>&
rowTransfer,
8157 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > &
domainTransfer,
8158 const Teuchos::RCP<const map_type>& domainMap,
8159 const Teuchos::RCP<const map_type>&
rangeMap,
8160 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
8167 using Teuchos::ArrayRCP;
8168 using Teuchos::ArrayView;
8169 using Teuchos::Comm;
8170 using Teuchos::ParameterList;
8175 typedef node_type
NT;
8180 const bool debug = Behavior::debug(
"CrsMatrix");
8181 const bool verbose = Behavior::verbose(
"CrsMatrix");
8182 int MyPID = getComm ()->getRank ();
8187 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
8188 std::ostringstream
os;
8190 std::cerr <<
os.str();
8197 bool reverseMode =
false;
8198 bool restrictComm =
false;
8200 int mm_optimization_core_count =
8201 Behavior::TAFC_OptimizationCoreCount();
8202 RCP<ParameterList> matrixparams;
8203 bool overrideAllreduce =
false;
8204 if (! params.is_null ()) {
8205 matrixparams = sublist (params,
"CrsMatrix");
8206 reverseMode = params->get (
"Reverse Mode", reverseMode);
8207 restrictComm = params->get (
"Restrict Communicator", restrictComm);
8208 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
8209 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
8210 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
8212 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
8213 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
8214 if(reverseMode) isMM =
false;
8218 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
8220 int reduced_mismatch = 0;
8221 if (isMM && !overrideAllreduce) {
8224 const bool source_vals = ! getGraph ()->getImporter ().is_null();
8225 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
8226 rowTransfer.getRemoteLIDs ().size() == 0);
8227 mismatch = (source_vals != target_vals) ? 1 : 0;
8229 ::Tpetra::Details::iallreduce (mismatch, reduced_mismatch,
8230 Teuchos::REDUCE_MAX, * (getComm ()));
8233#ifdef HAVE_TPETRA_MMM_TIMINGS
8234 using Teuchos::TimeMonitor;
8236 if(!params.is_null())
8237 label = params->get(
"Timer Label",label);
8238 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
8241 std::ostringstream os;
8242 if(isMM) os<<
":MMOpt";
8243 else os<<
":MMLegacy";
8247 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
8255 const import_type* xferAsImport =
dynamic_cast<const import_type*
> (&rowTransfer);
8256 const export_type* xferAsExport =
dynamic_cast<const export_type*
> (&rowTransfer);
8257 TEUCHOS_TEST_FOR_EXCEPTION(
8258 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
8259 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
8260 "argument must be either an Import or an Export, and its template "
8261 "parameters must match the corresponding template parameters of the "
8269 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type> (domainTransfer);
8270 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type> (domainTransfer);
8272 if(! domainTransfer.is_null()) {
8273 TEUCHOS_TEST_FOR_EXCEPTION(
8274 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
8275 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
8276 "argument must be either an Import or an Export, and its template "
8277 "parameters must match the corresponding template parameters of the "
8280 TEUCHOS_TEST_FOR_EXCEPTION(
8281 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
8282 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
8283 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
8284 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
8285 "arguments must be of the same type (either Import or Export).");
8287 TEUCHOS_TEST_FOR_EXCEPTION(
8288 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
8289 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
8290 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
8291 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
8292 "arguments must be of the same type (either Import or Export).");
8298 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
8302 RCP<const map_type> MyRowMap = reverseMode ?
8303 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
8304 RCP<const map_type> MyColMap;
8305 RCP<const map_type> MyDomainMap = !
domainMap.is_null () ?
8307 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
8308 rangeMap : getRangeMap ();
8309 RCP<const map_type> BaseRowMap = MyRowMap;
8310 RCP<const map_type> BaseDomainMap = MyDomainMap;
8318 if (! destMat.is_null ()) {
8329 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
8330 ! destMat->getGraph ()->isGloballyIndexed ();
8331 TEUCHOS_TEST_FOR_EXCEPTION(
8332 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
8333 "transferAndFillComplete: The input argument 'destMat' is only allowed "
8334 "to be nonnull, if its graph is empty (neither locally nor globally "
8343 TEUCHOS_TEST_FOR_EXCEPTION(
8344 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
8345 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
8346 "input argument 'destMat' is not the same as the (row) Map specified "
8347 "by the input argument 'rowTransfer'.");
8348 TEUCHOS_TEST_FOR_EXCEPTION(
8349 ! destMat->checkSizes (*
this), std::invalid_argument,
8350 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
8351 "destination matrix, but checkSizes() indicates that it is not a legal "
8352 "legal target for redistribution from the source matrix (*this). This "
8353 "may mean that they do not have the same dimensions.");
8367 TEUCHOS_TEST_FOR_EXCEPTION(
8368 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
8369 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
8370 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
8371 TEUCHOS_TEST_FOR_EXCEPTION(
8372 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
8373 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
8374 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
8377 TEUCHOS_TEST_FOR_EXCEPTION(
8378 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*
domainMap),
8379 std::invalid_argument,
8380 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
8381 "argument must be the same as the rebalanced domain map 'domainMap'");
8383 TEUCHOS_TEST_FOR_EXCEPTION(
8384 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*
domainMap),
8385 std::invalid_argument,
8386 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
8387 "argument must be the same as the rebalanced domain map 'domainMap'");
8400 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
8401 ArrayView<const LO> ExportLIDs = reverseMode ?
8402 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
8403 ArrayView<const LO> RemoteLIDs = reverseMode ?
8404 rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
8405 ArrayView<const LO> PermuteToLIDs = reverseMode ?
8406 rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
8407 ArrayView<const LO> PermuteFromLIDs = reverseMode ?
8408 rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
8409 Distributor& Distor = rowTransfer.getDistributor ();
8412 Teuchos::Array<int> SourcePids;
8413 Teuchos::Array<int> TargetPids;
8416 RCP<const map_type> ReducedRowMap, ReducedColMap,
8417 ReducedDomainMap, ReducedRangeMap;
8418 RCP<const Comm<int> > ReducedComm;
8422 if (destMat.is_null ()) {
8423 destMat = rcp (
new this_type (MyRowMap, 0, StaticProfile, matrixparams));
8430 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
8431 ReducedComm = ReducedRowMap.is_null () ?
8433 ReducedRowMap->getComm ();
8434 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
8436 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
8438 MyDomainMap->replaceCommWithSubset (ReducedComm);
8439 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
8441 MyRangeMap->replaceCommWithSubset (ReducedComm);
8444 MyRowMap = ReducedRowMap;
8445 MyDomainMap = ReducedDomainMap;
8446 MyRangeMap = ReducedRangeMap;
8449 if (! ReducedComm.is_null ()) {
8450 MyPID = ReducedComm->getRank ();
8457 ReducedComm = MyRowMap->getComm ();
8466 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8469 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8471 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8478 Import_Util::getPids (*MyImporter, SourcePids,
false);
8480 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8483 IntVectorType SourceDomain_pids(getDomainMap (),
true);
8484 IntVectorType SourceCol_pids(getColMap());
8486 SourceDomain_pids.putScalar(MyPID);
8488 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8489 SourcePids.resize (getColMap ()->getNodeNumElements ());
8490 SourceCol_pids.get1dCopy (SourcePids ());
8492 else if (MyImporter.is_null ()) {
8494 SourcePids.resize (getColMap ()->getNodeNumElements ());
8495 SourcePids.assign (getColMap ()->getNodeNumElements (), MyPID);
8497 else if ( ! MyImporter.is_null () &&
8498 ! domainTransfer.is_null () ) {
8505 IntVectorType TargetDomain_pids (
domainMap);
8506 TargetDomain_pids.putScalar (MyPID);
8509 IntVectorType SourceDomain_pids (getDomainMap ());
8512 IntVectorType SourceCol_pids (getColMap ());
8514 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8515 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8517 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8518 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8520 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8521 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8523 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8524 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8527 TEUCHOS_TEST_FOR_EXCEPTION(
8528 true, std::logic_error,
"Tpetra::CrsMatrix::"
8529 "transferAndFillComplete: Should never get here! "
8530 "Please report this bug to a Tpetra developer.");
8532 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8533 SourcePids.resize (getColMap ()->getNodeNumElements ());
8534 SourceCol_pids.get1dCopy (SourcePids ());
8536 else if ( ! MyImporter.is_null () &&
8537 BaseDomainMap->isSameAs (*BaseRowMap) &&
8538 getDomainMap ()->isSameAs (*getRowMap ())) {
8541 IntVectorType TargetRow_pids (
domainMap);
8542 IntVectorType SourceRow_pids (getRowMap ());
8543 IntVectorType SourceCol_pids (getColMap ());
8545 TargetRow_pids.putScalar (MyPID);
8546 if (! reverseMode && xferAsImport !=
nullptr) {
8547 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport,
INSERT);
8549 else if (reverseMode && xferAsExport !=
nullptr) {
8550 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport,
INSERT);
8552 else if (! reverseMode && xferAsExport !=
nullptr) {
8553 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport,
INSERT);
8555 else if (reverseMode && xferAsImport !=
nullptr) {
8556 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport,
INSERT);
8559 TEUCHOS_TEST_FOR_EXCEPTION(
8560 true, std::logic_error,
"Tpetra::CrsMatrix::"
8561 "transferAndFillComplete: Should never get here! "
8562 "Please report this bug to a Tpetra developer.");
8565 SourceCol_pids.doImport (SourceRow_pids, *MyImporter,
INSERT);
8566 SourcePids.resize (getColMap ()->getNodeNumElements ());
8567 SourceCol_pids.get1dCopy (SourcePids ());
8570 TEUCHOS_TEST_FOR_EXCEPTION(
8571 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8572 "transferAndFillComplete: This method only allows either domainMap == "
8573 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8574 "getDomainMap () == getRowMap ()).");
8578 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8579 if (constantNumPackets == 0) {
8580 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8581 RemoteLIDs.size ());
8588 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
8589 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8594 using Teuchos::outArg;
8595 using Teuchos::REDUCE_MAX;
8596 using Teuchos::reduceAll;
8599 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8600 const int myRank = comm->getRank ();
8602 std::ostringstream errStrm;
8606 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8609 destMat->numExportPacketsPerLID_.modify_host ();
8610 numExportPacketsPerLID =
8613 catch (std::exception& e) {
8614 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8615 << e.what () << std::endl;
8619 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8620 "an exception not a subclass of std::exception" << std::endl;
8624 if (! comm.is_null ()) {
8625 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8629 TEUCHOS_TEST_FOR_EXCEPTION(
8630 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8631 "exception on at least one process.");
8635 std::ostringstream os;
8636 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8638 std::cerr << os.str ();
8643 numExportPacketsPerLID,
8646 constantNumPackets);
8648 catch (std::exception& e) {
8649 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8650 << e.what () << std::endl;
8654 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8655 "an exception not a subclass of std::exception" << std::endl;
8660 std::ostringstream os;
8661 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8663 std::cerr << os.str ();
8666 if (! comm.is_null ()) {
8667 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8671 TEUCHOS_TEST_FOR_EXCEPTION(
8672 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8673 "exception on at least one process.");
8678 destMat->numExportPacketsPerLID_.modify_host ();
8679 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8682 std::ostringstream os;
8683 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8685 std::cerr << os.str ();
8689 numExportPacketsPerLID,
8692 constantNumPackets);
8694 std::ostringstream os;
8695 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8697 std::cerr << os.str ();
8702 if (! communication_needed) {
8704 std::ostringstream os;
8705 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8706 std::cerr << os.str ();
8711 if (constantNumPackets == 0) {
8713 std::ostringstream os;
8714 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8716 std::cerr << os.str ();
8721 destMat->numExportPacketsPerLID_.sync_host ();
8722 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8724 destMat->numImportPacketsPerLID_.sync_host ();
8725 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8729 std::ostringstream os;
8730 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8732 std::cerr << os.str ();
8734 Distor.doReversePostsAndWaits (numExportPacketsPerLID, 1,
8735 numImportPacketsPerLID);
8737 std::ostringstream os;
8738 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8740 std::cerr << os.str ();
8743 size_t totalImportPackets = 0;
8744 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8745 totalImportPackets += numImportPacketsPerLID[i];
8750 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8751 verbosePrefix.get ());
8752 destMat->imports_.modify_host ();
8753 Teuchos::ArrayView<char> hostImports =
8757 destMat->exports_.sync_host ();
8758 Teuchos::ArrayView<const char> hostExports =
8761 std::ostringstream os;
8762 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
8764 std::cerr << os.str ();
8766 Distor.doReversePostsAndWaits (hostExports,
8767 numExportPacketsPerLID,
8769 numImportPacketsPerLID);
8771 std::ostringstream os;
8772 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
8774 std::cerr << os.str ();
8779 std::ostringstream os;
8780 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8782 std::cerr << os.str ();
8784 destMat->imports_.modify_host ();
8785 Teuchos::ArrayView<char> hostImports =
8789 destMat->exports_.sync_host ();
8790 Teuchos::ArrayView<const char> hostExports =
8793 std::ostringstream os;
8794 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8796 std::cerr << os.str ();
8798 Distor.doReversePostsAndWaits (hostExports,
8802 std::ostringstream os;
8803 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8805 std::cerr << os.str ();
8810 if (constantNumPackets == 0) {
8812 std::ostringstream os;
8813 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8815 std::cerr << os.str ();
8820 destMat->numExportPacketsPerLID_.sync_host ();
8821 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8823 destMat->numImportPacketsPerLID_.sync_host ();
8824 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8827 std::ostringstream os;
8828 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8830 std::cerr << os.str ();
8832 Distor.doPostsAndWaits (numExportPacketsPerLID, 1,
8833 numImportPacketsPerLID);
8835 std::ostringstream os;
8836 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8838 std::cerr << os.str ();
8841 size_t totalImportPackets = 0;
8842 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8843 totalImportPackets += numImportPacketsPerLID[i];
8848 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8849 verbosePrefix.get ());
8850 destMat->imports_.modify_host ();
8851 Teuchos::ArrayView<char> hostImports =
8855 destMat->exports_.sync_host ();
8856 Teuchos::ArrayView<const char> hostExports =
8859 std::ostringstream os;
8860 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8862 std::cerr << os.str ();
8864 Distor.doPostsAndWaits (hostExports,
8865 numExportPacketsPerLID,
8867 numImportPacketsPerLID);
8869 std::ostringstream os;
8870 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8872 std::cerr << os.str ();
8877 std::ostringstream os;
8878 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8880 std::cerr << os.str ();
8882 destMat->imports_.modify_host ();
8883 Teuchos::ArrayView<char> hostImports =
8887 destMat->exports_.sync_host ();
8888 Teuchos::ArrayView<const char> hostExports =
8891 std::ostringstream os;
8892 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8894 std::cerr << os.str ();
8896 Distor.doPostsAndWaits (hostExports,
8900 std::ostringstream os;
8901 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8903 std::cerr << os.str ();
8914 destMat->numImportPacketsPerLID_.sync_host ();
8915 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
8917 destMat->imports_.sync_host ();
8918 Teuchos::ArrayView<const char> hostImports =
8922 std::ostringstream os;
8923 os << *verbosePrefix <<
"Calling unpackAndCombineWithOwningPIDsCount"
8925 std::cerr << os.str ();
8931 numImportPacketsPerLID,
8938 std::ostringstream os;
8939 os << *verbosePrefix <<
"unpackAndCombineWithOwningPIDsCount returned "
8940 << mynnz << std::endl;
8941 std::cerr << os.str ();
8943 size_t N = BaseRowMap->getNodeNumElements ();
8946 ArrayRCP<size_t> CSR_rowptr(N+1);
8947 ArrayRCP<GO> CSR_colind_GID;
8948 ArrayRCP<LO> CSR_colind_LID;
8949 ArrayRCP<Scalar> CSR_vals;
8950 CSR_colind_GID.resize (mynnz);
8951 CSR_vals.resize (mynnz);
8955 if (
typeid (LO) ==
typeid (GO)) {
8956 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8959 CSR_colind_LID.resize (mynnz);
8963 std::ostringstream os;
8964 os << *verbosePrefix <<
"Calling unpackAndCombineIntoCrsArrays"
8966 std::cerr << os.str ();
8976 numImportPacketsPerLID,
8987 Teuchos::av_reinterpret_cast<impl_scalar_type> (CSR_vals ()),
8997 Teuchos::Array<int> RemotePids;
8999 std::ostringstream os;
9000 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
9002 std::cerr << os.str ();
9004 Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (),
9013 std::ostringstream os;
9014 os << *verbosePrefix <<
"restrictComm="
9015 << (restrictComm ?
"true" :
"false") << std::endl;
9016 std::cerr << os.str ();
9023 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
9025 MyColMap->replaceCommWithSubset (ReducedComm);
9026 MyColMap = ReducedColMap;
9031 std::ostringstream os;
9032 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
9033 std::cerr << os.str ();
9035 destMat->replaceColMap (MyColMap);
9042 if (ReducedComm.is_null ()) {
9044 std::ostringstream os;
9045 os << *verbosePrefix <<
"I am no longer in the communicator; "
9046 "returning" << std::endl;
9047 std::cerr << os.str ();
9055 if ((! reverseMode && xferAsImport !=
nullptr) ||
9056 (reverseMode && xferAsExport !=
nullptr)) {
9058 std::ostringstream os;
9059 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
9060 std::cerr << os.str ();
9062 Import_Util::sortCrsEntries (CSR_rowptr (),
9066 else if ((! reverseMode && xferAsExport !=
nullptr) ||
9067 (reverseMode && xferAsImport !=
nullptr)) {
9069 std::ostringstream os;
9070 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
9072 std::cerr << os.str();
9074 Import_Util::sortAndMergeCrsEntries (CSR_rowptr (),
9077 if (CSR_rowptr[N] != mynnz) {
9078 CSR_colind_LID.resize (CSR_rowptr[N]);
9079 CSR_vals.resize (CSR_rowptr[N]);
9083 TEUCHOS_TEST_FOR_EXCEPTION(
9084 true, std::logic_error,
"Tpetra::CrsMatrix::"
9085 "transferAndFillComplete: Should never get here! "
9086 "Please report this bug to a Tpetra developer.");
9093 std::ostringstream os;
9094 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
9095 std::cerr << os.str ();
9103 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
9109 Teuchos::ParameterList esfc_params;
9111 RCP<import_type> MyImport;
9114 if (iallreduceRequest.get () !=
nullptr) {
9116 std::ostringstream os;
9117 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
9119 std::cerr << os.str ();
9121 iallreduceRequest->wait ();
9122 if (reduced_mismatch != 0) {
9128#ifdef HAVE_TPETRA_MMM_TIMINGS
9129 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
9134 std::ostringstream os;
9135 os << *verbosePrefix <<
"Calling getAllValues" << endl;
9136 std::cerr << os.str ();
9139 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
9140 Teuchos::ArrayRCP<int> type3PIDs;
9141 Teuchos::ArrayRCP<const size_t> rowptr;
9142 Teuchos::ArrayRCP<const LO> colind;
9143 Teuchos::ArrayRCP<const Scalar> vals;
9145#ifdef HAVE_TPETRA_MMM_TIMINGS
9146 TimeMonitor tm_getAllValues (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMgetAllValues")));
9148 getAllValues(rowptr,colind,vals);
9152 std::ostringstream os;
9153 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
9154 std::cerr << os.str ();
9158#ifdef HAVE_TPETRA_MMM_TIMINGS
9159 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
9161 Import_Util::reverseNeighborDiscovery(*
this,
9173 std::ostringstream os;
9174 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
9175 std::cerr << os.str ();
9178 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
9179 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
9181 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
9182 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
9184 const int numCols = getGraph()->getColMap()->getNodeNumElements();
9186 std::vector<bool> IsOwned(numCols,
true);
9187 std::vector<int> SentTo(numCols,-1);
9188 if (! MyImporter.is_null ()) {
9189 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
9190 IsOwned[rlid]=
false;
9194 std::vector<std::pair<int,GO> > usrtg;
9195 usrtg.reserve(TEPID2.size());
9198 const auto&
colMap = * (this->getColMap ());
9200 const LO row = TELID2[i];
9201 const int pid = TEPID2[i];
9202 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
9203 const int col = colind[j];
9204 if (IsOwned[col] && SentTo[col] != pid) {
9206 GO gid =
colMap.getGlobalElement (col);
9207 usrtg.push_back (std::pair<int,GO> (pid, gid));
9214 std::sort(usrtg.begin(),usrtg.end());
9215 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
9217 usrtg.erase(eopg,usrtg.end());
9220 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
9221 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
9224 for(
auto && p : usrtg) {
9225 EPID2[pos]= p.first;
9226 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
9230 Teuchos::ArrayView<int> EPID3 = type3PIDs();
9231 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
9232 GO InfGID = std::numeric_limits<GO>::max();
9233 int InfPID = INT_MAX;
9237#define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
9238 int i1=0, i2=0, i3=0;
9239 int Len1 = EPID1.size();
9240 int Len2 = EPID2.size();
9241 int Len3 = EPID3.size();
9243 int MyLen=Len1+Len2+Len3;
9244 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
9245 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
9248 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
9249 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
9250 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
9251 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
9253 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
9254 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
9255 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
9257 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9258 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9262 bool added_entry=
false;
9264 if(PID1 == MIN_PID && GID1 == MIN_GID){
9265 userExportLIDs[iloc]=ELID1[i1];
9266 userExportPIDs[iloc]=EPID1[i1];
9271 if(PID2 == MIN_PID && GID2 == MIN_GID){
9273 userExportLIDs[iloc]=ELID2[i2];
9274 userExportPIDs[iloc]=EPID2[i2];
9280 if(PID3 == MIN_PID && GID3 == MIN_GID){
9282 userExportLIDs[iloc]=ELID3[i3];
9283 userExportPIDs[iloc]=EPID3[i3];
9291 std::ostringstream os;
9292 os << *verbosePrefix <<
"Create Import" << std::endl;
9293 std::cerr << os.str ();
9296#ifdef HAVE_TPETRA_MMM_TIMINGS
9297 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
9299 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
9301 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9302 MyImport = rcp (
new import_type (MyDomainMap,
9305 userExportLIDs.view(0,iloc).getConst(),
9306 userExportPIDs.view(0,iloc).getConst(),
9311 std::ostringstream os;
9312 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
9313 std::cerr << os.str ();
9317#ifdef HAVE_TPETRA_MMM_TIMINGS
9318 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
9319 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
9321 if(!params.is_null())
9322 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
9323 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
9329#ifdef HAVE_TPETRA_MMM_TIMINGS
9330 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
9333 std::ostringstream os;
9334 os << *verbosePrefix <<
"Create Import" << std::endl;
9335 std::cerr << os.str ();
9338#ifdef HAVE_TPETRA_MMM_TIMINGS
9339 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
9341 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
9342 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
9343 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9344 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9347 std::ostringstream os;
9348 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
9349 std::cerr << os.str ();
9352#ifdef HAVE_TPETRA_MMM_TIMINGS
9353 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
9354 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
9356 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
9359 if (!params.is_null ()) {
9360 esfc_params.set (
"compute global constants",
9361 params->get (
"compute global constants",
true));
9363 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9364 MyImport, Teuchos::null,
9365 rcp (
new Teuchos::ParameterList (esfc_params)));
9369 std::ostringstream os;
9370 os << *verbosePrefix <<
"Done" << endl;
9371 std::cerr << os.str ();
9376 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9381 const Teuchos::RCP<const map_type>& domainMap,
9382 const Teuchos::RCP<const map_type>&
rangeMap,
9383 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
9388 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9394 const Teuchos::RCP<const map_type>& domainMap,
9395 const Teuchos::RCP<const map_type>&
rangeMap,
9396 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
9401 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9406 const Teuchos::RCP<const map_type>& domainMap,
9407 const Teuchos::RCP<const map_type>&
rangeMap,
9408 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
9413 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9419 const Teuchos::RCP<const map_type>& domainMap,
9420 const Teuchos::RCP<const map_type>&
rangeMap,
9421 const Teuchos::RCP<Teuchos::ParameterList>&
params)
const
9435#define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9437 template class CrsMatrix< SCALAR , LO , GO , NODE >; \
9438 template Teuchos::RCP< CrsMatrix< SCALAR , LO , GO , NODE > > \
9439 CrsMatrix< SCALAR , LO , GO , NODE >::convert< SCALAR > () const;
9441#define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9443 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9444 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9446#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9448 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9449 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9450 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9451 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9452 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9453 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9454 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9455 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9456 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9457 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9458 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9459 const Teuchos::RCP<Teuchos::ParameterList>& params);
9461#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9463 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9464 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9465 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9466 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9467 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9468 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9469 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9470 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9471 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9472 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9473 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9474 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9475 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9476 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9477 const Teuchos::RCP<Teuchos::ParameterList>& params);
9480#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9482 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9483 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9484 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9485 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9486 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9487 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9488 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9489 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9490 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9491 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9492 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9493 const Teuchos::RCP<Teuchos::ParameterList>& params);
9495#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9497 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9498 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9499 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9500 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9501 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9502 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9503 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9504 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9505 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9506 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9507 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9508 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9509 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9510 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9511 const Teuchos::RCP<Teuchos::ParameterList>& params);
9514#define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9515 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9516 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9517 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9518 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9519 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)