| //===- VectorAnalysis.cpp - Analysis for Vectorization --------------------===// |
| // |
| // Copyright 2019 The MLIR Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // ============================================================================= |
| |
| #include "mlir/Analysis/VectorAnalysis.h" |
| #include "mlir/AffineOps/AffineOps.h" |
| #include "mlir/Analysis/AffineAnalysis.h" |
| #include "mlir/Analysis/LoopAnalysis.h" |
| #include "mlir/IR/IntegerSet.h" |
| #include "mlir/IR/Operation.h" |
| #include "mlir/StandardOps/Ops.h" |
| #include "mlir/Support/Functional.h" |
| #include "mlir/Support/STLExtras.h" |
| #include "mlir/VectorOps/VectorOps.h" |
| |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/SetVector.h" |
| |
| /// |
| /// Implements Analysis functions specific to vectors which support |
| /// the vectorization and vectorization materialization passes. |
| /// |
| |
| using namespace mlir; |
| |
| using llvm::SetVector; |
| |
| Optional<SmallVector<unsigned, 4>> |
| mlir::shapeRatio(ArrayRef<int64_t> superShape, ArrayRef<int64_t> subShape) { |
| if (superShape.size() < subShape.size()) { |
| return Optional<SmallVector<unsigned, 4>>(); |
| } |
| |
| // Starting from the end, compute the integer divisors. |
| // Set the boolean `divides` if integral division is not possible. |
| std::vector<unsigned> result; |
| result.reserve(superShape.size()); |
| bool divides = true; |
| auto divide = [÷s, &result](int superSize, int subSize) { |
| assert(superSize > 0 && "superSize must be > 0"); |
| assert(subSize > 0 && "subSize must be > 0"); |
| divides &= (superSize % subSize == 0); |
| result.push_back(superSize / subSize); |
| }; |
| functional::zipApply( |
| divide, SmallVector<int64_t, 8>{superShape.rbegin(), superShape.rend()}, |
| SmallVector<int64_t, 8>{subShape.rbegin(), subShape.rend()}); |
| |
| // If integral division does not occur, return and let the caller decide. |
| if (!divides) { |
| return None; |
| } |
| |
| // At this point we computed the ratio (in reverse) for the common |
| // size. Fill with the remaining entries from the super-vector shape (still in |
| // reverse). |
| int commonSize = subShape.size(); |
| std::copy(superShape.rbegin() + commonSize, superShape.rend(), |
| std::back_inserter(result)); |
| |
| assert(result.size() == superShape.size() && |
| "super to sub shape ratio is not of the same size as the super rank"); |
| |
| // Reverse again to get it back in the proper order and return. |
| return SmallVector<unsigned, 4>{result.rbegin(), result.rend()}; |
| } |
| |
| Optional<SmallVector<unsigned, 4>> mlir::shapeRatio(VectorType superVectorType, |
| VectorType subVectorType) { |
| assert(superVectorType.getElementType() == subVectorType.getElementType() && |
| "vector types must be of the same elemental type"); |
| return shapeRatio(superVectorType.getShape(), subVectorType.getShape()); |
| } |
| |
| /// Constructs a permutation map from memref indices to vector dimension. |
| /// |
| /// The implementation uses the knowledge of the mapping of enclosing loop to |
| /// vector dimension. `enclosingLoopToVectorDim` carries this information as a |
| /// map with: |
| /// - keys representing "vectorized enclosing loops"; |
| /// - values representing the corresponding vector dimension. |
| /// The algorithm traverses "vectorized enclosing loops" and extracts the |
| /// at-most-one MemRef index that is invariant along said loop. This index is |
| /// guaranteed to be at most one by construction: otherwise the MemRef is not |
| /// vectorizable. |
| /// If this invariant index is found, it is added to the permutation_map at the |
| /// proper vector dimension. |
| /// If no index is found to be invariant, 0 is added to the permutation_map and |
| /// corresponds to a vector broadcast along that dimension. |
| /// |
| /// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty, |
| /// signalling that no permutation map can be constructed given |
| /// `enclosingLoopToVectorDim`. |
| /// |
| /// Examples can be found in the documentation of `makePermutationMap`, in the |
| /// header file. |
| static AffineMap makePermutationMap( |
| Operation::operand_range operands, |
| const DenseMap<Operation *, unsigned> &enclosingLoopToVectorDim) { |
| if (enclosingLoopToVectorDim.empty()) |
| return AffineMap(); |
| MLIRContext *context = |
| enclosingLoopToVectorDim.begin()->getFirst()->getContext(); |
| using functional::makePtrDynCaster; |
| using functional::map; |
| SmallVector<Value *, 8> indices(operands); |
| SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(), |
| getAffineConstantExpr(0, context)); |
| |
| for (auto kvp : enclosingLoopToVectorDim) { |
| assert(kvp.second < perm.size()); |
| auto invariants = getInvariantAccesses( |
| cast<AffineForOp>(kvp.first).getInductionVar(), indices); |
| unsigned numIndices = indices.size(); |
| unsigned countInvariantIndices = 0; |
| for (unsigned dim = 0; dim < numIndices; ++dim) { |
| if (!invariants.count(indices[dim])) { |
| assert(perm[kvp.second] == getAffineConstantExpr(0, context) && |
| "permutationMap already has an entry along dim"); |
| perm[kvp.second] = getAffineDimExpr(dim, context); |
| } else { |
| ++countInvariantIndices; |
| } |
| } |
| assert((countInvariantIndices == numIndices || |
| countInvariantIndices == numIndices - 1) && |
| "Vectorization prerequisite violated: at most 1 index may be " |
| "invariant wrt a vectorized loop"); |
| } |
| return AffineMap::get(indices.size(), 0, perm, {}); |
| } |
| |
| /// Implementation detail that walks up the parents and records the ones with |
| /// the specified type. |
| /// TODO(ntv): could also be implemented as a collect parents followed by a |
| /// filter and made available outside this file. |
| template <typename T> |
| static SetVector<Operation *> getParentsOfType(Operation *op) { |
| SetVector<Operation *> res; |
| auto *current = op; |
| while (auto *parent = current->getParentOp()) { |
| if (auto typedParent = dyn_cast<T>(parent)) { |
| assert(res.count(parent) == 0 && "Already inserted"); |
| res.insert(parent); |
| } |
| current = parent; |
| } |
| return res; |
| } |
| |
| /// Returns the enclosing AffineForOp, from closest to farthest. |
| static SetVector<Operation *> getEnclosingforOps(Operation *op) { |
| return getParentsOfType<AffineForOp>(op); |
| } |
| |
| AffineMap mlir::makePermutationMap( |
| Operation *op, const DenseMap<Operation *, unsigned> &loopToVectorDim) { |
| DenseMap<Operation *, unsigned> enclosingLoopToVectorDim; |
| auto enclosingLoops = getEnclosingforOps(op); |
| for (auto *forInst : enclosingLoops) { |
| auto it = loopToVectorDim.find(forInst); |
| if (it != loopToVectorDim.end()) { |
| enclosingLoopToVectorDim.insert(*it); |
| } |
| } |
| |
| if (auto load = dyn_cast<LoadOp>(op)) { |
| return ::makePermutationMap(load.getIndices(), enclosingLoopToVectorDim); |
| } |
| |
| auto store = cast<StoreOp>(op); |
| return ::makePermutationMap(store.getIndices(), enclosingLoopToVectorDim); |
| } |
| |
| bool mlir::matcher::operatesOnSuperVectorsOf(Operation &op, |
| VectorType subVectorType) { |
| // First, extract the vector type and ditinguish between: |
| // a. ops that *must* lower a super-vector (i.e. vector.transfer_read, |
| // vector.transfer_write); and |
| // b. ops that *may* lower a super-vector (all other ops). |
| // The ops that *may* lower a super-vector only do so if the super-vector to |
| // sub-vector ratio exists. The ops that *must* lower a super-vector are |
| // explicitly checked for this property. |
| /// TODO(ntv): there should be a single function for all ops to do this so we |
| /// do not have to special case. Maybe a trait, or just a method, unclear atm. |
| bool mustDivide = false; |
| VectorType superVectorType; |
| if (auto read = dyn_cast<VectorTransferReadOp>(op)) { |
| superVectorType = read.getResultType(); |
| mustDivide = true; |
| } else if (auto write = dyn_cast<VectorTransferWriteOp>(op)) { |
| superVectorType = write.getVectorType(); |
| mustDivide = true; |
| } else if (op.getNumResults() == 0) { |
| if (!isa<ReturnOp>(op)) { |
| op.emitError("NYI: assuming only return operations can have 0 " |
| " results at this point"); |
| } |
| return false; |
| } else if (op.getNumResults() == 1) { |
| if (auto v = op.getResult(0)->getType().dyn_cast<VectorType>()) { |
| superVectorType = v; |
| } else { |
| // Not a vector type. |
| return false; |
| } |
| } else { |
| // Not a vector.transfer and has more than 1 result, fail hard for now to |
| // wake us up when something changes. |
| op.emitError("NYI: operation has more than 1 result"); |
| return false; |
| } |
| |
| // Get the ratio. |
| auto ratio = shapeRatio(superVectorType, subVectorType); |
| |
| // Sanity check. |
| assert((ratio.hasValue() || !mustDivide) && |
| "vector.transfer operation in which super-vector size is not an" |
| " integer multiple of sub-vector size"); |
| |
| // This catches cases that are not strictly necessary to have multiplicity but |
| // still aren't divisible by the sub-vector shape. |
| // This could be useful information if we wanted to reshape at the level of |
| // the vector type (but we would have to look at the compute and distinguish |
| // between parallel, reduction and possibly other cases. |
| if (!ratio.hasValue()) { |
| return false; |
| } |
| |
| return true; |
| } |