Blame - mlir/lib/Transforms/LoopFusion.cpp - external/github.com/llvm/llvm-project.git

blob: c757ea8e58b741992bc2c27962937227eee2c9c2 [file] [log] [blame]

MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	1	//===- LoopFusion.cpp - Code to perform loop fusion -----------------------===//
				2	//
				3	// Copyright 2019 The MLIR Authors.
				4	//
				5	// Licensed under the Apache License, Version 2.0 (the "License");
				6	// you may not use this file except in compliance with the License.
				7	// You may obtain a copy of the License at
				8	//
				9	// http://www.apache.org/licenses/LICENSE-2.0
				10	//
				11	// Unless required by applicable law or agreed to in writing, software
				12	// distributed under the License is distributed on an "AS IS" BASIS,
				13	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	// See the License for the specific language governing permissions and
				15	// limitations under the License.
				16	// =============================================================================
				17	//
				18	// This file implements loop fusion.
				19	//
				20	//===----------------------------------------------------------------------===//
				21
River Riddle	7555383	2019-01-29 05:23:53	[diff] [blame]	22	#include "mlir/AffineOps/AffineOps.h"
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	23	#include "mlir/Analysis/AffineAnalysis.h"
Uday Bondhugula	dfe07b7	2019-02-23 00:51:08	[diff] [blame]	24	#include "mlir/Analysis/AffineStructures.h"
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	25	#include "mlir/Analysis/LoopAnalysis.h"
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	26	#include "mlir/Analysis/Utils.h"
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	27	#include "mlir/IR/AffineExpr.h"
				28	#include "mlir/IR/AffineMap.h"
				29	#include "mlir/IR/Builders.h"
River Riddle	48ccae2	2019-02-20 01:17:46	[diff] [blame]	30	#include "mlir/Pass/Pass.h"
Lei Zhang	85d9b6c	2019-03-01 21:48:24	[diff] [blame]	31	#include "mlir/StandardOps/Ops.h"
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	32	#include "mlir/Transforms/LoopUtils.h"
				33	#include "mlir/Transforms/Passes.h"
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	34	#include "mlir/Transforms/Utils.h"
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	35	#include "llvm/ADT/DenseMap.h"
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	36	#include "llvm/ADT/DenseSet.h"
				37	#include "llvm/ADT/SetVector.h"
MLIR Team	4eef795	2018-12-21 19:06:23	[diff] [blame]	38	#include "llvm/Support/CommandLine.h"
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	39	#include "llvm/Support/Debug.h"
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	40	#include "llvm/Support/raw_ostream.h"
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	41	#include <iomanip>
Jacques Pienaar	57270a9	2019-03-19 15:45:06	[diff] [blame]	42	#include <sstream>
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	43
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	44	#define DEBUG_TYPE "loop-fusion"
				45
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	46	using llvm::SetVector;
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	47
				48	using namespace mlir;
				49
River Riddle	75c21e1	2019-01-26 06:14:04	[diff] [blame]	50	static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
				51
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	52	/// Disables fusion profitability check and fuses if valid. Ignore any
				53	/// additional (redundant) computation tolerance threshold
				54	/// that would have prevented fusion.
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	55	static llvm::cl::opt<bool>
Uday Bondhugula	eee8536	2019-03-02 01:42:13	[diff] [blame]	56	clMaximalLoopFusion("fusion-maximal",
River Riddle	75c21e1	2019-01-26 06:14:04	[diff] [blame]	57	llvm::cl::desc("Enables maximal loop fusion"),
				58	llvm::cl::cat(clOptionsCategory));
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	59
				60	/// A threshold in percent of additional computation allowed when fusing.
				61	static llvm::cl::opt<double> clFusionAddlComputeTolerance(
Uday Bondhugula	eee8536	2019-03-02 01:42:13	[diff] [blame]	62	"fusion-compute-tolerance",
Uday Bondhugula	a1dad3a	2019-02-20 02:17:19	[diff] [blame]	63	llvm::cl::desc("Fractional increase in additional "
				64	"computation tolerated while fusing"),
River Riddle	75c21e1	2019-01-26 06:14:04	[diff] [blame]	65	llvm::cl::cat(clOptionsCategory));
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	66
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	67	static llvm::cl::opt<unsigned> clFusionFastMemorySpace(
Uday Bondhugula	eee8536	2019-03-02 01:42:13	[diff] [blame]	68	"fusion-fast-mem-space",
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	69	llvm::cl::desc("Faster memory space number to promote fusion buffers to"),
				70	llvm::cl::cat(clOptionsCategory));
				71
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	72	// A local buffer of size less than or equal to this size is automatically
				73	// promoted to fast memory after producer-consumer fusion.
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	74	static llvm::cl::opt<unsigned long long> clFusionLocalBufThreshold(
Uday Bondhugula	eee8536	2019-03-02 01:42:13	[diff] [blame]	75	"fusion-local-buf-threshold",
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	76	llvm::cl::desc("Threshold size (KiB) for promoting local buffers to fast "
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	77	"memory space"),
				78	llvm::cl::cat(clOptionsCategory));
				79
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	80	namespace {
				81
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	82	/// Loop fusion pass. This pass currently supports a greedy fusion policy,
				83	/// which fuses loop nests with single-writer/single-reader memref dependences
				84	/// with the goal of improving locality.
				85
				86	// TODO(andydavis) Support fusion of source loop nests which write to multiple
				87	// memrefs, where each memref can have multiple users (if profitable).
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	88	// TODO(andydavis) Extend this pass to check for fusion preventing dependences,
				89	// and add support for more general loop fusion algorithms.
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	90
River Riddle	c6c5344	2019-02-27 18:59:29	[diff] [blame]	91	struct LoopFusion : public FunctionPass<LoopFusion> {
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	92	LoopFusion(unsigned fastMemorySpace = 0, uint64_t localBufSizeThreshold = 0,
				93	bool maximalFusion = false)
River Riddle	c6c5344	2019-02-27 18:59:29	[diff] [blame]	94	: localBufSizeThreshold(localBufSizeThreshold),
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	95	fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {}
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	96
River Riddle	ed5fe20	2019-02-28 22:50:42	[diff] [blame]	97	void runOnFunction() override;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	98
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	99	// Any local buffers smaller than this size (in bytes) will be created in
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	100	// `fastMemorySpace` if provided.
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	101	uint64_t localBufSizeThreshold;
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	102	Optional<unsigned> fastMemorySpace = None;
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	103	// If true, ignore any additional (redundant) computation tolerance threshold
				104	// that would have prevented fusion.
				105	bool maximalFusion;
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	106
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	107	// The amount of additional computation that is tolerated while fusing
				108	// pair-wise as a fraction of the total computation.
				109	constexpr static double kComputeToleranceThreshold = 0.30f;
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	110	};
				111
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	112	} // end anonymous namespace
				113
River Riddle	c6c5344	2019-02-27 18:59:29	[diff] [blame]	114	FunctionPassBase *mlir::createLoopFusionPass(unsigned fastMemorySpace,
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	115	uint64_t localBufSizeThreshold,
				116	bool maximalFusion) {
				117	return new LoopFusion(fastMemorySpace, localBufSizeThreshold, maximalFusion);
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	118	}
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	119
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	120	namespace {
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	121
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	122	// LoopNestStateCollector walks loop nests and collects load and store
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	123	// operations, and whether or not an IfInst was encountered in the loop nest.
River Riddle	bf9c381	2019-02-05 00:24:44	[diff] [blame]	124	struct LoopNestStateCollector {
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	125	SmallVector<AffineForOp, 4> forOps;
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	126	SmallVector<Instruction *, 4> loadOpInsts;
				127	SmallVector<Instruction *, 4> storeOpInsts;
River Riddle	7555383	2019-01-29 05:23:53	[diff] [blame]	128	bool hasNonForRegion = false;
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	129
River Riddle	bf9c381	2019-02-05 00:24:44	[diff] [blame]	130	void collect(Instruction *instToWalk) {
				131	instToWalk->walk([&](Instruction *opInst) {
				132	if (opInst->isa<AffineForOp>())
				133	forOps.push_back(opInst->cast<AffineForOp>());
Alex Zinenko	276fae1	2019-03-14 17:38:44	[diff] [blame]	134	else if (opInst->getNumRegions() != 0)
River Riddle	bf9c381	2019-02-05 00:24:44	[diff] [blame]	135	hasNonForRegion = true;
				136	else if (opInst->isa<LoadOp>())
				137	loadOpInsts.push_back(opInst);
				138	else if (opInst->isa<StoreOp>())
				139	storeOpInsts.push_back(opInst);
				140	});
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	141	}
				142	};
				143
MLIR Team	71495d5	2019-01-22 21:23:37	[diff] [blame]	144	// TODO(b/117228571) Replace when this is modeled through side-effects/op traits
Chris Lattner	986310a	2019-03-23 22:09:06	[diff] [blame]	145	static bool isMemRefDereferencingOp(Instruction &op) {
MLIR Team	71495d5	2019-01-22 21:23:37	[diff] [blame]	146	if (op.isa<LoadOp>() \|\| op.isa<StoreOp>() \|\| op.isa<DmaStartOp>() \|\|
				147	op.isa<DmaWaitOp>())
				148	return true;
				149	return false;
				150	}
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	151
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	152	// MemRefDependenceGraph is a graph data structure where graph nodes are
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	153	// top-level instructions in a Function which contain load/store ops, and edges
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	154	// are memref dependences between the nodes.
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	155	// TODO(andydavis) Add a more flexible dependece graph representation.
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	156	// TODO(andydavis) Add a depth parameter to dependence graph construction.
				157	struct MemRefDependenceGraph {
				158	public:
				159	// Node represents a node in the graph. A Node is either an entire loop nest
				160	// rooted at the top level which contains loads/stores, or a top level
				161	// load/store.
				162	struct Node {
				163	// The unique identifier of this node in the graph.
				164	unsigned id;
				165	// The top-level statment which is (or contains) loads/stores.
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	166	Instruction *inst;
Chris Lattner	5187cfc	2018-12-28 05:21:41	[diff] [blame]	167	// List of load operations.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	168	SmallVector<Instruction *, 4> loads;
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	169	// List of store op insts.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	170	SmallVector<Instruction *, 4> stores;
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	171	Node(unsigned id, Instruction *inst) : id(id), inst(inst) {}
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	172
				173	// Returns the load op count for 'memref'.
Chris Lattner	3f19031	2018-12-27 22:35:10	[diff] [blame]	174	unsigned getLoadOpCount(Value *memref) {
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	175	unsigned loadOpCount = 0;
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	176	for (auto *loadOpInst : loads) {
				177	if (memref == loadOpInst->cast<LoadOp>()->getMemRef())
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	178	++loadOpCount;
				179	}
				180	return loadOpCount;
				181	}
				182
				183	// Returns the store op count for 'memref'.
Chris Lattner	3f19031	2018-12-27 22:35:10	[diff] [blame]	184	unsigned getStoreOpCount(Value *memref) {
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	185	unsigned storeOpCount = 0;
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	186	for (auto *storeOpInst : stores) {
				187	if (memref == storeOpInst->cast<StoreOp>()->getMemRef())
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	188	++storeOpCount;
				189	}
				190	return storeOpCount;
				191	}
MLIR Team	58aa383	2019-02-16 01:12:19	[diff] [blame]	192
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	193	// Returns all store ops in 'storeOps' which access 'memref'.
MLIR Team	58aa383	2019-02-16 01:12:19	[diff] [blame]	194	void getStoreOpsForMemref(Value *memref,
				195	SmallVectorImpl<Instruction > storeOps) {
				196	for (auto *storeOpInst : stores) {
				197	if (memref == storeOpInst->cast<StoreOp>()->getMemRef())
				198	storeOps->push_back(storeOpInst);
				199	}
				200	}
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	201
				202	// Returns all load ops in 'loadOps' which access 'memref'.
				203	void getLoadOpsForMemref(Value *memref,
				204	SmallVectorImpl<Instruction > loadOps) {
				205	for (auto *loadOpInst : loads) {
				206	if (memref == loadOpInst->cast<LoadOp>()->getMemRef())
				207	loadOps->push_back(loadOpInst);
				208	}
				209	}
				210
				211	// Returns all memrefs in 'loadAndStoreMemrefSet' for which this node
				212	// has at least one load and store operation.
				213	void getLoadAndStoreMemrefSet(DenseSet<Value > loadAndStoreMemrefSet) {
				214	llvm::SmallDenseSet<Value *, 2> loadMemrefs;
				215	for (auto *loadOpInst : loads) {
				216	loadMemrefs.insert(loadOpInst->cast<LoadOp>()->getMemRef());
				217	}
				218	for (auto *storeOpInst : stores) {
				219	auto *memref = storeOpInst->cast<StoreOp>()->getMemRef();
				220	if (loadMemrefs.count(memref) > 0)
				221	loadAndStoreMemrefSet->insert(memref);
				222	}
				223	}
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	224	};
				225
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	226	// Edge represents a data dependece between nodes in the graph.
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	227	struct Edge {
				228	// The id of the node at the other end of the edge.
MLIR Team	1e85191	2019-01-31 00:01:46	[diff] [blame]	229	// If this edge is stored in Edge = Node.inEdges[i], then
				230	// 'Node.inEdges[i].id' is the identifier of the source node of the edge.
				231	// If this edge is stored in Edge = Node.outEdges[i], then
				232	// 'Node.outEdges[i].id' is the identifier of the dest node of the edge.
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	233	unsigned id;
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	234	// The SSA value on which this edge represents a dependence.
				235	// If the value is a memref, then the dependence is between graph nodes
				236	// which contain accesses to the same memref 'value'. If the value is a
				237	// non-memref value, then the dependence is between a graph node which
				238	// defines an SSA value and another graph node which uses the SSA value
				239	// (e.g. a constant instruction defining a value which is used inside a loop
				240	// nest).
				241	Value *value;
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	242	};
				243
				244	// Map from node id to Node.
				245	DenseMap<unsigned, Node> nodes;
				246	// Map from node id to list of input edges.
				247	DenseMap<unsigned, SmallVector<Edge, 2>> inEdges;
				248	// Map from node id to list of output edges.
				249	DenseMap<unsigned, SmallVector<Edge, 2>> outEdges;
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	250	// Map from memref to a count on the dependence edges associated with that
				251	// memref.
				252	DenseMap<Value *, unsigned> memrefEdgeCount;
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	253	// The next unique identifier to use for newly created graph nodes.
				254	unsigned nextNodeId = 0;
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	255
				256	MemRefDependenceGraph() {}
				257
				258	// Initializes the dependence graph based on operations in 'f'.
				259	// Returns true on success, false otherwise.
Chris Lattner	69d9e99	2018-12-28 16:48:09	[diff] [blame]	260	bool init(Function *f);
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	261
				262	// Returns the graph node for 'id'.
				263	Node *getNode(unsigned id) {
				264	auto it = nodes.find(id);
				265	assert(it != nodes.end());
				266	return &it->second;
				267	}
				268
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	269	// Adds a node with 'inst' to the graph and returns its unique identifier.
				270	unsigned addNode(Instruction *inst) {
				271	Node node(nextNodeId++, inst);
				272	nodes.insert({node.id, node});
				273	return node.id;
				274	}
				275
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	276	// Remove node 'id' (and its associated edges) from graph.
				277	void removeNode(unsigned id) {
				278	// Remove each edge in 'inEdges[id]'.
				279	if (inEdges.count(id) > 0) {
				280	SmallVector<Edge, 2> oldInEdges = inEdges[id];
				281	for (auto &inEdge : oldInEdges) {
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	282	removeEdge(inEdge.id, id, inEdge.value);
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	283	}
				284	}
				285	// Remove each edge in 'outEdges[id]'.
				286	if (outEdges.count(id) > 0) {
				287	SmallVector<Edge, 2> oldOutEdges = outEdges[id];
				288	for (auto &outEdge : oldOutEdges) {
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	289	removeEdge(id, outEdge.id, outEdge.value);
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	290	}
				291	}
				292	// Erase remaining node state.
				293	inEdges.erase(id);
				294	outEdges.erase(id);
				295	nodes.erase(id);
				296	}
				297
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	298	// Returns true if node 'id' writes to any memref which escapes (or is an
				299	// argument to) the function/block. Returns false otherwise.
				300	bool writesToLiveInOrEscapingMemrefs(unsigned id) {
MLIR Team	71495d5	2019-01-22 21:23:37	[diff] [blame]	301	Node *node = getNode(id);
				302	for (auto *storeOpInst : node->stores) {
				303	auto *memref = storeOpInst->cast<StoreOp>()->getMemRef();
				304	auto *inst = memref->getDefiningInst();
MLIR Team	58aa383	2019-02-16 01:12:19	[diff] [blame]	305	// Return true if 'memref' is a block argument.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	306	if (!inst)
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	307	return true;
MLIR Team	58aa383	2019-02-16 01:12:19	[diff] [blame]	308	// Return true if any use of 'memref' escapes the function.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	309	for (auto &use : memref->getUses())
				310	if (!isMemRefDereferencingOp(*use.getOwner()))
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	311	return true;
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	312	}
				313	return false;
				314	}
				315
				316	// Returns true if node 'id' can be removed from the graph. Returns false
				317	// otherwise. A node can be removed from the graph iff the following
				318	// conditions are met:
				319	// *) The node does not write to any memref which escapes (or is a
				320	// function/block argument).
				321	// *) The node has no successors in the dependence graph.
				322	bool canRemoveNode(unsigned id) {
				323	if (writesToLiveInOrEscapingMemrefs(id))
				324	return false;
				325	Node *node = getNode(id);
				326	for (auto *storeOpInst : node->stores) {
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	327	// Return false if there exist out edges from 'id' on 'memref'.
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	328	if (getOutEdgeCount(id, storeOpInst->cast<StoreOp>()->getMemRef()) > 0)
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	329	return false;
MLIR Team	71495d5	2019-01-22 21:23:37	[diff] [blame]	330	}
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	331	return true;
MLIR Team	71495d5	2019-01-22 21:23:37	[diff] [blame]	332	}
				333
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	334	// Returns true iff there is an edge from node 'srcId' to node 'dstId' which
				335	// is for 'value' if non-null, or for any value otherwise. Returns false
				336	// otherwise.
				337	bool hasEdge(unsigned srcId, unsigned dstId, Value *value = nullptr) {
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	338	if (outEdges.count(srcId) == 0 \|\| inEdges.count(dstId) == 0) {
				339	return false;
				340	}
				341	bool hasOutEdge = llvm::any_of(outEdges[srcId], [=](Edge &edge) {
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	342	return edge.id == dstId && (!value \|\| edge.value == value);
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	343	});
				344	bool hasInEdge = llvm::any_of(inEdges[dstId], [=](Edge &edge) {
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	345	return edge.id == srcId && (!value \|\| edge.value == value);
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	346	});
				347	return hasOutEdge && hasInEdge;
				348	}
				349
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	350	// Adds an edge from node 'srcId' to node 'dstId' for 'value'.
				351	void addEdge(unsigned srcId, unsigned dstId, Value *value) {
				352	if (!hasEdge(srcId, dstId, value)) {
				353	outEdges[srcId].push_back({dstId, value});
				354	inEdges[dstId].push_back({srcId, value});
				355	if (value->getType().isa<MemRefType>())
				356	memrefEdgeCount[value]++;
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	357	}
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	358	}
				359
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	360	// Removes an edge from node 'srcId' to node 'dstId' for 'value'.
				361	void removeEdge(unsigned srcId, unsigned dstId, Value *value) {
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	362	assert(inEdges.count(dstId) > 0);
				363	assert(outEdges.count(srcId) > 0);
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	364	if (value->getType().isa<MemRefType>()) {
				365	assert(memrefEdgeCount.count(value) > 0);
				366	memrefEdgeCount[value]--;
				367	}
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	368	// Remove 'srcId' from 'inEdges[dstId]'.
				369	for (auto it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	370	if ((it).id == srcId && (it).value == value) {
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	371	inEdges[dstId].erase(it);
				372	break;
				373	}
				374	}
				375	// Remove 'dstId' from 'outEdges[srcId]'.
				376	for (auto it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	377	if ((it).id == dstId && (it).value == value) {
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	378	outEdges[srcId].erase(it);
				379	break;
				380	}
				381	}
				382	}
				383
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	384	// Returns true if there is a path in the dependence graph from node 'srcId'
				385	// to node 'dstId'. Returns false otherwise.
				386	bool hasDependencePath(unsigned srcId, unsigned dstId) {
				387	// Worklist state is: <node-id, next-output-edge-index-to-visit>
				388	SmallVector<std::pair<unsigned, unsigned>, 4> worklist;
				389	worklist.push_back({srcId, 0});
				390	// Run DFS traversal to see if 'dstId' is reachable from 'srcId'.
				391	while (!worklist.empty()) {
				392	auto &idAndIndex = worklist.back();
				393	// Return true if we have reached 'dstId'.
				394	if (idAndIndex.first == dstId)
				395	return true;
				396	// Pop and continue if node has no out edges, or if all out edges have
				397	// already been visited.
				398	if (outEdges.count(idAndIndex.first) == 0 \|\|
				399	idAndIndex.second == outEdges[idAndIndex.first].size()) {
				400	worklist.pop_back();
				401	continue;
				402	}
				403	// Get graph edge to traverse.
				404	Edge edge = outEdges[idAndIndex.first][idAndIndex.second];
				405	// Increment next output edge index for 'idAndIndex'.
				406	++idAndIndex.second;
				407	// Add node at 'edge.id' to worklist.
				408	worklist.push_back({edge.id, 0});
				409	}
				410	return false;
				411	}
				412
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	413	// Returns the input edge count for node 'id' and 'memref' from src nodes
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	414	// which access 'memref' with a store operation.
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	415	unsigned getIncomingMemRefAccesses(unsigned id, Value *memref) {
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	416	unsigned inEdgeCount = 0;
				417	if (inEdges.count(id) > 0)
				418	for (auto &inEdge : inEdges[id])
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	419	if (inEdge.value == memref) {
				420	Node *srcNode = getNode(inEdge.id);
				421	// Only count in edges from 'srcNode' if 'srcNode' accesses 'memref'
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	422	if (srcNode->getStoreOpCount(memref) > 0)
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	423	++inEdgeCount;
				424	}
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	425	return inEdgeCount;
				426	}
				427
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	428	// Returns the output edge count for node 'id' and 'memref' (if non-null),
				429	// otherwise returns the total output edge count from node 'id'.
				430	unsigned getOutEdgeCount(unsigned id, Value *memref = nullptr) {
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	431	unsigned outEdgeCount = 0;
				432	if (outEdges.count(id) > 0)
				433	for (auto &outEdge : outEdges[id])
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	434	if (!memref \|\| outEdge.value == memref)
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	435	++outEdgeCount;
				436	return outEdgeCount;
				437	}
				438
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	439	// Computes and returns an insertion point instruction, before which the
				440	// the fused <srcId, dstId> loop nest can be inserted while preserving
				441	// dependences. Returns nullptr if no such insertion point is found.
MLIR Team	a78edcd	2019-02-05 14:57:08	[diff] [blame]	442	Instruction *getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) {
MLIR Team	5c5739d	2019-01-25 06:27:40	[diff] [blame]	443	if (outEdges.count(srcId) == 0)
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	444	return getNode(dstId)->inst;
				445
				446	// Build set of insts in range (srcId, dstId) which depend on 'srcId'.
				447	SmallPtrSet<Instruction *, 2> srcDepInsts;
				448	for (auto &outEdge : outEdges[srcId])
MLIR Team	a78edcd	2019-02-05 14:57:08	[diff] [blame]	449	if (outEdge.id != dstId)
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	450	srcDepInsts.insert(getNode(outEdge.id)->inst);
				451
				452	// Build set of insts in range (srcId, dstId) on which 'dstId' depends.
				453	SmallPtrSet<Instruction *, 2> dstDepInsts;
				454	for (auto &inEdge : inEdges[dstId])
MLIR Team	a78edcd	2019-02-05 14:57:08	[diff] [blame]	455	if (inEdge.id != srcId)
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	456	dstDepInsts.insert(getNode(inEdge.id)->inst);
				457
				458	Instruction *srcNodeInst = getNode(srcId)->inst;
				459	Instruction *dstNodeInst = getNode(dstId)->inst;
				460
				461	// Computing insertion point:
				462	// *) Walk all instruction positions in Block instruction list in the
				463	// range (src, dst). For each instruction 'inst' visited in this search:
				464	// *) Store in 'firstSrcDepPos' the first position where 'inst' has a
				465	// dependence edge from 'srcNode'.
				466	// *) Store in 'lastDstDepPost' the last position where 'inst' has a
				467	// dependence edge to 'dstNode'.
				468	// *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the
				469	// instruction insertion point (or return null pointer if no such
				470	// insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos').
				471	SmallVector<Instruction *, 2> depInsts;
				472	Optional<unsigned> firstSrcDepPos;
				473	Optional<unsigned> lastDstDepPos;
				474	unsigned pos = 0;
				475	for (Block::iterator it = std::next(Block::iterator(srcNodeInst));
				476	it != Block::iterator(dstNodeInst); ++it) {
				477	Instruction inst = &(it);
				478	if (srcDepInsts.count(inst) > 0 && firstSrcDepPos == None)
				479	firstSrcDepPos = pos;
				480	if (dstDepInsts.count(inst) > 0)
				481	lastDstDepPos = pos;
				482	depInsts.push_back(inst);
				483	++pos;
MLIR Team	5c5739d	2019-01-25 06:27:40	[diff] [blame]	484	}
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	485
				486	if (firstSrcDepPos.hasValue()) {
				487	if (lastDstDepPos.hasValue()) {
				488	if (firstSrcDepPos.getValue() <= lastDstDepPos.getValue()) {
				489	// No valid insertion point exists which preserves dependences.
				490	return nullptr;
				491	}
				492	}
				493	// Return the insertion point at 'firstSrcDepPos'.
				494	return depInsts[firstSrcDepPos.getValue()];
				495	}
				496	// No dependence targets in range (or only dst deps in range), return
				497	// 'dstNodInst' insertion point.
				498	return dstNodeInst;
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	499	}
				500
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	501	// Updates edge mappings from node 'srcId' to node 'dstId' after 'oldMemRef'
				502	// has been replaced in node at 'dstId' by a private memref.
				503	void updateEdges(unsigned srcId, unsigned dstId, Value *oldMemRef) {
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	504	// For each edge in 'inEdges[srcId]': add new edge remaping to 'dstId'.
				505	if (inEdges.count(srcId) > 0) {
				506	SmallVector<Edge, 2> oldInEdges = inEdges[srcId];
				507	for (auto &inEdge : oldInEdges) {
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	508	// Add edge from 'inEdge.id' to 'dstId' if not for 'oldMemRef'.
				509	if (inEdge.value != oldMemRef)
				510	addEdge(inEdge.id, dstId, inEdge.value);
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	511	}
				512	}
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	513	// For each edge in 'outEdges[srcId]': remove edge from 'srcId' to 'dstId'.
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	514	if (outEdges.count(srcId) > 0) {
				515	SmallVector<Edge, 2> oldOutEdges = outEdges[srcId];
				516	for (auto &outEdge : oldOutEdges) {
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	517	// Remove any out edges from 'srcId' to 'dstId' across memrefs.
				518	if (outEdge.id == dstId)
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	519	removeEdge(srcId, outEdge.id, outEdge.value);
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	520	}
				521	}
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	522	// Remove any edges in 'inEdges[dstId]' on 'oldMemRef' (which is being
				523	// replaced by a private memref). These edges could come from nodes
				524	// other than 'srcId' which were removed in the previous step.
				525	if (inEdges.count(dstId) > 0) {
				526	SmallVector<Edge, 2> oldInEdges = inEdges[dstId];
				527	for (auto &inEdge : oldInEdges)
				528	if (inEdge.value == oldMemRef)
				529	removeEdge(inEdge.id, dstId, inEdge.value);
				530	}
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	531	}
				532
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	533	// Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion
				534	// of sibling node 'sidId' into node 'dstId'.
				535	void updateEdges(unsigned sibId, unsigned dstId) {
				536	// For each edge in 'inEdges[sibId]':
				537	// *) Add new edge from source node 'inEdge.id' to 'dstNode'.
				538	// *) Remove edge from source node 'inEdge.id' to 'sibNode'.
				539	if (inEdges.count(sibId) > 0) {
				540	SmallVector<Edge, 2> oldInEdges = inEdges[sibId];
				541	for (auto &inEdge : oldInEdges) {
				542	addEdge(inEdge.id, dstId, inEdge.value);
				543	removeEdge(inEdge.id, sibId, inEdge.value);
				544	}
				545	}
				546
				547	// For each edge in 'outEdges[sibId]' to node 'id'
				548	// *) Add new edge from 'dstId' to 'outEdge.id'.
				549	// *) Remove edge from 'sibId' to 'outEdge.id'.
				550	if (outEdges.count(sibId) > 0) {
				551	SmallVector<Edge, 2> oldOutEdges = outEdges[sibId];
				552	for (auto &outEdge : oldOutEdges) {
				553	addEdge(dstId, outEdge.id, outEdge.value);
				554	removeEdge(sibId, outEdge.id, outEdge.value);
				555	}
				556	}
				557	}
				558
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	559	// Adds ops in 'loads' and 'stores' to node at 'id'.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	560	void addToNode(unsigned id, const SmallVectorImpl<Instruction *> &loads,
				561	const SmallVectorImpl<Instruction *> &stores) {
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	562	Node *node = getNode(id);
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	563	for (auto *loadOpInst : loads)
				564	node->loads.push_back(loadOpInst);
				565	for (auto *storeOpInst : stores)
				566	node->stores.push_back(storeOpInst);
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	567	}
				568
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	569	void clearNodeLoadAndStores(unsigned id) {
				570	Node *node = getNode(id);
				571	node->loads.clear();
				572	node->stores.clear();
				573	}
				574
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	575	// Calls 'callback' for each input edge incident to node 'id' which carries a
				576	// memref dependence.
				577	void forEachMemRefInputEdge(unsigned id,
				578	const std::function<void(Edge)> &callback) {
				579	if (inEdges.count(id) > 0)
				580	forEachMemRefEdge(inEdges[id], callback);
				581	}
				582	// Calls 'callback' for each output edge from node 'id' which carries a
				583	// memref dependence.
				584	void forEachMemRefOutputEdge(unsigned id,
				585	const std::function<void(Edge)> &callback) {
				586	if (outEdges.count(id) > 0)
				587	forEachMemRefEdge(outEdges[id], callback);
				588	}
				589	// Calls 'callback' for each edge in 'edges' which carries a memref
				590	// dependence.
				591	void forEachMemRefEdge(ArrayRef<Edge> edges,
				592	const std::function<void(Edge)> &callback) {
				593	for (auto &edge : edges) {
				594	// Skip if 'edge' is not a memref dependence edge.
				595	if (!edge.value->getType().isa<MemRefType>())
				596	continue;
				597	assert(nodes.count(edge.id) > 0);
				598	// Skip if 'edge.id' is not a loop nest.
				599	if (!getNode(edge.id)->inst->isa<AffineForOp>())
				600	continue;
				601	// Visit current input edge 'edge'.
				602	callback(edge);
				603	}
				604	}
				605
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	606	void print(raw_ostream &os) const {
				607	os << "\nMemRefDependenceGraph\n";
				608	os << "\nNodes:\n";
				609	for (auto &idAndNode : nodes) {
				610	os << "Node: " << idAndNode.first << "\n";
				611	auto it = inEdges.find(idAndNode.first);
				612	if (it != inEdges.end()) {
				613	for (const auto &e : it->second)
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	614	os << " InEdge: " << e.id << " " << e.value << "\n";
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	615	}
				616	it = outEdges.find(idAndNode.first);
				617	if (it != outEdges.end()) {
				618	for (const auto &e : it->second)
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	619	os << " OutEdge: " << e.id << " " << e.value << "\n";
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	620	}
				621	}
				622	}
				623	void dump() const { print(llvm::errs()); }
				624	};
				625
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	626	// Intializes the data dependence graph by walking instructions in 'f'.
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	627	// Assigns each node in the graph a node id based on program order in 'f'.
Chris Lattner	315a466	2018-12-28 21:07:39	[diff] [blame]	628	// TODO(andydavis) Add support for taking a Block arg to construct the
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	629	// dependence graph at a different depth.
Chris Lattner	69d9e99	2018-12-28 16:48:09	[diff] [blame]	630	bool MemRefDependenceGraph::init(Function *f) {
Chris Lattner	3f19031	2018-12-27 22:35:10	[diff] [blame]	631	DenseMap<Value *, SetVector<unsigned>> memrefAccesses;
Chris Lattner	dffc589	2018-12-29 23:33:43	[diff] [blame]	632
				633	// TODO: support multi-block functions.
				634	if (f->getBlocks().size() != 1)
				635	return false;
				636
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	637	DenseMap<Instruction *, unsigned> forToNodeMap;
Chris Lattner	dffc589	2018-12-29 23:33:43	[diff] [blame]	638	for (auto &inst : f->front()) {
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	639	if (auto forOp = inst.dyn_cast<AffineForOp>()) {
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	640	// Create graph node 'id' to represent top-level 'forOp' and record
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	641	// all loads and store accesses it contains.
				642	LoopNestStateCollector collector;
River Riddle	bf9c381	2019-02-05 00:24:44	[diff] [blame]	643	collector.collect(&inst);
River Riddle	832567b	2019-03-25 17:14:34	[diff] [blame]	644	// Return false if a non 'affine.for' region was found (not currently
				645	// supported).
River Riddle	7555383	2019-01-29 05:23:53	[diff] [blame]	646	if (collector.hasNonForRegion)
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	647	return false;
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	648	Node node(nextNodeId++, &inst);
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	649	for (auto *opInst : collector.loadOpInsts) {
				650	node.loads.push_back(opInst);
				651	auto *memref = opInst->cast<LoadOp>()->getMemRef();
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	652	memrefAccesses[memref].insert(node.id);
				653	}
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	654	for (auto *opInst : collector.storeOpInsts) {
				655	node.stores.push_back(opInst);
				656	auto *memref = opInst->cast<StoreOp>()->getMemRef();
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	657	memrefAccesses[memref].insert(node.id);
				658	}
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	659	forToNodeMap[&inst] = node.id;
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	660	nodes.insert({node.id, node});
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	661	} else if (auto loadOp = inst.dyn_cast<LoadOp>()) {
				662	// Create graph node for top-level load op.
				663	Node node(nextNodeId++, &inst);
				664	node.loads.push_back(&inst);
				665	auto *memref = inst.cast<LoadOp>()->getMemRef();
				666	memrefAccesses[memref].insert(node.id);
				667	nodes.insert({node.id, node});
				668	} else if (auto storeOp = inst.dyn_cast<StoreOp>()) {
				669	// Create graph node for top-level store op.
				670	Node node(nextNodeId++, &inst);
				671	node.stores.push_back(&inst);
				672	auto *memref = inst.cast<StoreOp>()->getMemRef();
				673	memrefAccesses[memref].insert(node.id);
				674	nodes.insert({node.id, node});
Alex Zinenko	276fae1	2019-03-14 17:38:44	[diff] [blame]	675	} else if (inst.getNumRegions() != 0) {
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	676	// Return false if another region is found (not currently supported).
				677	return false;
				678	} else if (inst.getNumResults() > 0 && !inst.use_empty()) {
				679	// Create graph node for top-level producer of SSA values, which
				680	// could be used by loop nest nodes.
				681	Node node(nextNodeId++, &inst);
				682	nodes.insert({node.id, node});
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	683	}
				684	}
				685
				686	// Add dependence edges between nodes which produce SSA values and their
				687	// users.
				688	for (auto &idAndNode : nodes) {
				689	const Node &node = idAndNode.second;
				690	if (!node.loads.empty() \|\| !node.stores.empty())
				691	continue;
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	692	auto *opInst = node.inst;
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	693	for (auto *value : opInst->getResults()) {
				694	for (auto &use : value->getUses()) {
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	695	SmallVector<AffineForOp, 4> loops;
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	696	getLoopIVs(*use.getOwner(), &loops);
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	697	if (loops.empty())
				698	continue;
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	699	assert(forToNodeMap.count(loops[0].getInstruction()) > 0);
				700	unsigned userLoopNestId = forToNodeMap[loops[0].getInstruction()];
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	701	addEdge(node.id, userLoopNestId, value);
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	702	}
				703	}
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	704	}
				705
				706	// Walk memref access lists and add graph edges between dependent nodes.
				707	for (auto &memrefAndList : memrefAccesses) {
				708	unsigned n = memrefAndList.second.size();
				709	for (unsigned i = 0; i < n; ++i) {
				710	unsigned srcId = memrefAndList.second[i];
				711	bool srcHasStore =
				712	getNode(srcId)->getStoreOpCount(memrefAndList.first) > 0;
				713	for (unsigned j = i + 1; j < n; ++j) {
				714	unsigned dstId = memrefAndList.second[j];
				715	bool dstHasStore =
				716	getNode(dstId)->getStoreOpCount(memrefAndList.first) > 0;
				717	if (srcHasStore \|\| dstHasStore)
				718	addEdge(srcId, dstId, memrefAndList.first);
				719	}
				720	}
				721	}
				722	return true;
				723	}
				724
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	725	namespace {
				726
				727	// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
				728	// and operation count) for a loop nest up until the innermost loop body.
				729	struct LoopNestStats {
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	730	// Map from AffineForOp to immediate child AffineForOps in its loop body.
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	731	DenseMap<Instruction *, SmallVector<AffineForOp, 2>> loopMap;
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	732	// Map from AffineForOp to count of operations in its loop body.
				733	DenseMap<Instruction *, uint64_t> opCountMap;
				734	// Map from AffineForOp to its constant trip count.
				735	DenseMap<Instruction *, uint64_t> tripCountMap;
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	736	};
				737
				738	// LoopNestStatsCollector walks a single loop nest and gathers per-loop
				739	// trip count and operation count statistics and records them in 'stats'.
River Riddle	bf9c381	2019-02-05 00:24:44	[diff] [blame]	740	struct LoopNestStatsCollector {
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	741	LoopNestStats *stats;
				742	bool hasLoopWithNonConstTripCount = false;
				743
				744	LoopNestStatsCollector(LoopNestStats *stats) : stats(stats) {}
				745
River Riddle	bf9c381	2019-02-05 00:24:44	[diff] [blame]	746	void collect(Instruction *inst) {
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	747	inst->walk<AffineForOp>([&](AffineForOp forOp) {
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	748	auto *forInst = forOp.getInstruction();
				749	auto *parentInst = forOp.getInstruction()->getParentInst();
River Riddle	bf9c381	2019-02-05 00:24:44	[diff] [blame]	750	if (parentInst != nullptr) {
				751	assert(parentInst->isa<AffineForOp>() && "Expected parent AffineForOp");
				752	// Add mapping to 'forOp' from its parent AffineForOp.
				753	stats->loopMap[parentInst].push_back(forOp);
				754	}
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	755
River Riddle	bf9c381	2019-02-05 00:24:44	[diff] [blame]	756	// Record the number of op instructions in the body of 'forOp'.
				757	unsigned count = 0;
				758	stats->opCountMap[forInst] = 0;
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	759	for (auto &inst : *forOp.getBody()) {
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	760	if (!inst.isa<AffineForOp>() && !inst.isa<AffineIfOp>())
River Riddle	bf9c381	2019-02-05 00:24:44	[diff] [blame]	761	++count;
				762	}
				763	stats->opCountMap[forInst] = count;
				764	// Record trip count for 'forOp'. Set flag if trip count is not
				765	// constant.
				766	Optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
				767	if (!maybeConstTripCount.hasValue()) {
				768	hasLoopWithNonConstTripCount = true;
				769	return;
				770	}
				771	stats->tripCountMap[forInst] = maybeConstTripCount.getValue();
				772	});
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	773	}
				774	};
				775
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	776	// Computes the total cost of the loop nest rooted at 'forOp'.
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	777	// Currently, the total cost is computed by counting the total operation
				778	// instance count (i.e. total number of operations in the loop bodyloop
				779	// operation count * loop trip count) for the entire loop nest.
				780	// If 'tripCountOverrideMap' is non-null, overrides the trip count for loops
				781	// specified in the map when computing the total op instance count.
				782	// NOTE: this is used to compute the cost of computation slices, which are
				783	// sliced along the iteration dimension, and thus reduce the trip count.
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	784	// If 'computeCostMap' is non-null, the total op count for forOps specified
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	785	// in the map is increased (not overridden) by adding the op count from the
				786	// map to the existing op count for the for loop. This is done before
				787	// multiplying by the loop's trip count, and is used to model the cost of
				788	// inserting a sliced loop nest of known cost into the loop's body.
				789	// NOTE: this is used to compute the cost of fusing a slice of some loop nest
				790	// within another loop.
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	791	static int64_t getComputeCost(
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	792	Instruction forInst, LoopNestStats stats,
				793	llvm::SmallDenseMap<Instruction , uint64_t, 8> tripCountOverrideMap,
				794	DenseMap<Instruction , int64_t> computeCostMap) {
				795	// 'opCount' is the total number operations in one iteration of 'forOp' body
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	796	int64_t opCount = stats->opCountMap[forInst];
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	797	if (stats->loopMap.count(forInst) > 0) {
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	798	for (auto childForOp : stats->loopMap[forInst]) {
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	799	opCount += getComputeCost(childForOp.getInstruction(), stats,
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	800	tripCountOverrideMap, computeCostMap);
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	801	}
				802	}
				803	// Add in additional op instances from slice (if specified in map).
				804	if (computeCostMap != nullptr) {
				805	auto it = computeCostMap->find(forInst);
				806	if (it != computeCostMap->end()) {
				807	opCount += it->second;
				808	}
				809	}
				810	// Override trip count (if specified in map).
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	811	int64_t tripCount = stats->tripCountMap[forInst];
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	812	if (tripCountOverrideMap != nullptr) {
				813	auto it = tripCountOverrideMap->find(forInst);
				814	if (it != tripCountOverrideMap->end()) {
				815	tripCount = it->second;
				816	}
				817	}
				818	// Returns the total number of dynamic instances of operations in loop body.
				819	return tripCount * opCount;
				820	}
				821
				822	} // end anonymous namespace
				823
Uday Bondhugula	7aa60a3	2019-02-27 01:32:47	[diff] [blame]	824	// TODO(andydavis,b/126426796): extend this to handle multiple result maps.
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	825	static Optional<uint64_t> getConstDifference(AffineMap lbMap, AffineMap ubMap) {
Uday Bondhugula	c1ca23e	2019-01-16 21:13:00	[diff] [blame]	826	assert(lbMap.getNumResults() == 1 && "expected single result bound map");
				827	assert(ubMap.getNumResults() == 1 && "expected single result bound map");
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	828	assert(lbMap.getNumDims() == ubMap.getNumDims());
				829	assert(lbMap.getNumSymbols() == ubMap.getNumSymbols());
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	830	AffineExpr lbExpr(lbMap.getResult(0));
				831	AffineExpr ubExpr(ubMap.getResult(0));
				832	auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(),
				833	lbMap.getNumSymbols());
				834	auto cExpr = loopSpanExpr.dyn_cast<AffineConstantExpr>();
				835	if (!cExpr)
				836	return None;
				837	return cExpr.getValue();
				838	}
				839
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	840	// Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	841	// nest surrounding 'srcAccess' utilizing slice loop bounds in 'sliceState'.
				842	// Returns true on success, false otherwise (if a non-constant trip count
				843	// was encountered).
				844	// TODO(andydavis) Make this work with non-unit step loops.
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	845	static bool buildSliceTripCountMap(
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	846	Instruction srcOpInst, ComputationSliceState sliceState,
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	847	llvm::SmallDenseMap<Instruction , uint64_t, 8> tripCountMap) {
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	848	SmallVector<AffineForOp, 4> srcLoopIVs;
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	849	getLoopIVs(*srcOpInst, &srcLoopIVs);
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	850	unsigned numSrcLoopIVs = srcLoopIVs.size();
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	851	// Populate map from AffineForOp -> trip count
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	852	for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
				853	AffineMap lbMap = sliceState->lbs[i];
				854	AffineMap ubMap = sliceState->ubs[i];
Nicolas Vasilache	0e7a8a9	2019-01-26 18:41:17	[diff] [blame]	855	if (lbMap == AffineMap() \|\| ubMap == AffineMap()) {
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	856	// The iteration of src loop IV 'i' was not sliced. Use full loop bounds.
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	857	if (srcLoopIVs[i].hasConstantLowerBound() &&
				858	srcLoopIVs[i].hasConstantUpperBound()) {
				859	(*tripCountMap)[srcLoopIVs[i].getInstruction()] =
				860	srcLoopIVs[i].getConstantUpperBound() -
				861	srcLoopIVs[i].getConstantLowerBound();
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	862	continue;
				863	}
				864	return false;
				865	}
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	866	Optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
				867	if (!tripCount.hasValue())
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	868	return false;
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	869	(*tripCountMap)[srcLoopIVs[i].getInstruction()] = tripCount.getValue();
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	870	}
				871	return true;
				872	}
				873
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	874	// Removes load operations from 'srcLoads' which operate on 'memref', and
				875	// adds them to 'dstLoads'.
				876	static void
				877	moveLoadsAccessingMemrefTo(Value *memref,
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	878	SmallVectorImpl<Instruction > srcLoads,
				879	SmallVectorImpl<Instruction > dstLoads) {
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	880	dstLoads->clear();
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	881	SmallVector<Instruction *, 4> srcLoadsToKeep;
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	882	for (auto load : srcLoads) {
				883	if (load->cast<LoadOp>()->getMemRef() == memref)
				884	dstLoads->push_back(load);
				885	else
				886	srcLoadsToKeep.push_back(load);
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	887	}
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	888	srcLoads->swap(srcLoadsToKeep);
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	889	}
				890
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	891	// Returns the innermost common loop depth for the set of operations in 'ops'.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	892	static unsigned getInnermostCommonLoopDepth(ArrayRef<Instruction *> ops) {
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	893	unsigned numOps = ops.size();
				894	assert(numOps > 0);
				895
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	896	std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	897	unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
				898	for (unsigned i = 0; i < numOps; ++i) {
				899	getLoopIVs(*ops[i], &loops[i]);
				900	loopDepthLimit =
				901	std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	902	}
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	903
				904	unsigned loopDepth = 0;
				905	for (unsigned d = 0; d < loopDepthLimit; ++d) {
				906	unsigned i;
				907	for (i = 1; i < numOps; ++i) {
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	908	if (loops[i - 1][d] != loops[i][d])
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	909	break;
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	910	}
				911	if (i != numOps)
				912	break;
				913	++loopDepth;
				914	}
				915	return loopDepth;
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	916	}
				917
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	918	// Returns the maximum loop depth at which no dependences between 'loadOpInsts'
				919	// and 'storeOpInsts' are satisfied.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	920	static unsigned getMaxLoopDepth(ArrayRef<Instruction *> loadOpInsts,
				921	ArrayRef<Instruction *> storeOpInsts) {
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	922	// Merge loads and stores into the same array.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	923	SmallVector<Instruction *, 2> ops(loadOpInsts.begin(), loadOpInsts.end());
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	924	ops.append(storeOpInsts.begin(), storeOpInsts.end());
				925
				926	// Compute the innermost common loop depth for loads and stores.
				927	unsigned loopDepth = getInnermostCommonLoopDepth(ops);
				928
				929	// Return common loop depth for loads if there are no store ops.
				930	if (storeOpInsts.empty())
				931	return loopDepth;
				932
				933	// Check dependences on all pairs of ops in 'ops' and store the minimum
				934	// loop depth at which a dependence is satisfied.
				935	for (unsigned i = 0, e = ops.size(); i < e; ++i) {
				936	auto *srcOpInst = ops[i];
				937	MemRefAccess srcAccess(srcOpInst);
				938	for (unsigned j = 0; j < e; ++j) {
				939	auto *dstOpInst = ops[j];
				940	MemRefAccess dstAccess(dstOpInst);
				941
				942	unsigned numCommonLoops =
				943	getNumCommonSurroundingLoops(srcOpInst, dstOpInst);
				944	for (unsigned d = 1; d <= numCommonLoops + 1; ++d) {
				945	FlatAffineConstraints dependenceConstraints;
				946	// TODO(andydavis) Cache dependence analysis results, check cache here.
				947	if (checkMemrefAccessDependence(srcAccess, dstAccess, d,
				948	&dependenceConstraints,
				949	/dependenceComponents=/nullptr)) {
				950	// Store minimum loop depth and break because we want the min 'd' at
				951	// which there is a dependence.
				952	loopDepth = std::min(loopDepth, d - 1);
				953	break;
				954	}
				955	}
				956	}
				957	}
				958	return loopDepth;
				959	}
				960
MLIR Team	8f5f2c7	2019-02-15 17:32:18	[diff] [blame]	961	// Compute loop interchange permutation:
				962	// *) Computes dependence components between all op pairs in 'ops' for loop
				963	// depths in range [1, 'maxLoopDepth'].
				964	// *) Classifies the outermost 'maxLoopDepth' loops surrounding 'ops' as either
				965	// parallel or sequential.
				966	// *) Computes the loop permutation which sinks sequential loops deeper into
				967	// the loop nest, while preserving the relative order between other loops.
				968	// *) Checks each dependence component against the permutation to see if the
				969	// desired loop interchange would violated dependences by making the a
				970	// dependence componenent lexicographically negative.
				971	// TODO(andydavis) Move this function to LoopUtils.
				972	static bool
				973	computeLoopInterchangePermutation(ArrayRef<Instruction *> ops,
				974	unsigned maxLoopDepth,
				975	SmallVectorImpl<unsigned> *loopPermMap) {
				976	// Gather dependence components for dependences between all ops in 'ops'
				977	// at loop depths in range [1, maxLoopDepth].
				978	// TODO(andydavis) Refactor this loop into a LoopUtil utility function:
				979	// mlir::getDependenceComponents().
				980	// TODO(andydavis) Split this loop into two: first check all dependences,
				981	// and construct dep vectors. Then, scan through them to detect the parallel
				982	// ones.
				983	std::vector<llvm::SmallVector<DependenceComponent, 2>> depCompsVec;
				984	llvm::SmallVector<bool, 8> isParallelLoop(maxLoopDepth, true);
				985	unsigned numOps = ops.size();
				986	for (unsigned d = 1; d <= maxLoopDepth; ++d) {
				987	for (unsigned i = 0; i < numOps; ++i) {
				988	auto *srcOpInst = ops[i];
				989	MemRefAccess srcAccess(srcOpInst);
				990	for (unsigned j = 0; j < numOps; ++j) {
				991	auto *dstOpInst = ops[j];
				992	MemRefAccess dstAccess(dstOpInst);
				993
				994	FlatAffineConstraints dependenceConstraints;
				995	llvm::SmallVector<DependenceComponent, 2> depComps;
				996	// TODO(andydavis,bondhugula) Explore whether it would be profitable
				997	// to pre-compute and store deps instead of repeatidly checking.
				998	if (checkMemrefAccessDependence(srcAccess, dstAccess, d,
				999	&dependenceConstraints, &depComps)) {
				1000	isParallelLoop[d - 1] = false;
				1001	depCompsVec.push_back(depComps);
				1002	}
				1003	}
				1004	}
				1005	}
				1006	// Count the number of parallel loops.
				1007	unsigned numParallelLoops = 0;
				1008	for (unsigned i = 0, e = isParallelLoop.size(); i < e; ++i)
				1009	if (isParallelLoop[i])
				1010	++numParallelLoops;
				1011
				1012	// Compute permutation of loops that sinks sequential loops (and thus raises
				1013	// parallel loops) while preserving relative order.
				1014	llvm::SmallVector<unsigned, 4> loopPermMapInv;
				1015	loopPermMapInv.resize(maxLoopDepth);
				1016	loopPermMap->resize(maxLoopDepth);
				1017	unsigned nextSequentialLoop = numParallelLoops;
				1018	unsigned nextParallelLoop = 0;
				1019	for (unsigned i = 0; i < maxLoopDepth; ++i) {
				1020	if (isParallelLoop[i]) {
				1021	(*loopPermMap)[i] = nextParallelLoop;
				1022	loopPermMapInv[nextParallelLoop++] = i;
				1023	} else {
				1024	(*loopPermMap)[i] = nextSequentialLoop;
				1025	loopPermMapInv[nextSequentialLoop++] = i;
				1026	}
				1027	}
				1028
				1029	// Check each dependence component against the permutation to see if the
				1030	// desired loop interchange permutation would make the dependence vectors
				1031	// lexicographically negative.
				1032	// Example 1: [-1, 1][0, 0]
				1033	// Example 2: [0, 0][-1, 1]
				1034	for (unsigned i = 0, e = depCompsVec.size(); i < e; ++i) {
				1035	llvm::SmallVector<DependenceComponent, 2> &depComps = depCompsVec[i];
				1036	assert(depComps.size() >= maxLoopDepth);
				1037	// Check if the first non-zero dependence component is positive.
				1038	for (unsigned j = 0; j < maxLoopDepth; ++j) {
				1039	unsigned permIndex = loopPermMapInv[j];
				1040	assert(depComps[permIndex].lb.hasValue());
				1041	int64_t depCompLb = depComps[permIndex].lb.getValue();
				1042	if (depCompLb > 0)
				1043	break;
				1044	if (depCompLb < 0)
				1045	return false;
				1046	}
				1047	}
				1048	return true;
				1049	}
				1050
				1051	// Sinks all sequential loops to the innermost levels (while preserving
				1052	// relative order among them) and moves all parallel loops to the
				1053	// outermost (while again preserving relative order among them).
				1054	// This can increase the loop depth at which we can fuse a slice, since we are
				1055	// pushing loop carried dependence to a greater depth in the loop nest.
				1056	static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) {
				1057	assert(node->inst->isa<AffineForOp>());
				1058	// Get perfectly nested sequence of loops starting at root of loop nest.
				1059	// TODO(andydavis,bondhugula) Share this with similar code in loop tiling.
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	1060	SmallVector<AffineForOp, 4> loops;
				1061	AffineForOp curr = node->inst->cast<AffineForOp>();
MLIR Team	8f5f2c7	2019-02-15 17:32:18	[diff] [blame]	1062	loops.push_back(curr);
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1063	auto *currBody = curr.getBody();
MLIR Team	8f5f2c7	2019-02-15 17:32:18	[diff] [blame]	1064	while (!currBody->empty() &&
				1065	std::next(currBody->begin()) == currBody->end() &&
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1066	(curr = curr.getBody()->front().dyn_cast<AffineForOp>())) {
MLIR Team	8f5f2c7	2019-02-15 17:32:18	[diff] [blame]	1067	loops.push_back(curr);
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1068	currBody = curr.getBody();
MLIR Team	8f5f2c7	2019-02-15 17:32:18	[diff] [blame]	1069	}
				1070	if (loops.size() < 2)
				1071	return;
				1072
				1073	// Merge loads and stores into the same array.
				1074	SmallVector<Instruction *, 2> memOps(node->loads.begin(), node->loads.end());
				1075	memOps.append(node->stores.begin(), node->stores.end());
				1076
				1077	// Compute loop permutation in 'loopPermMap'.
				1078	llvm::SmallVector<unsigned, 4> loopPermMap;
				1079	if (!computeLoopInterchangePermutation(memOps, loops.size(), &loopPermMap))
				1080	return;
				1081
				1082	int loopNestRootIndex = -1;
				1083	for (int i = loops.size() - 1; i >= 0; --i) {
				1084	int permIndex = static_cast<int>(loopPermMap[i]);
				1085	// Store the index of the for loop which will be the new loop nest root.
				1086	if (permIndex == 0)
				1087	loopNestRootIndex = i;
				1088	if (permIndex > i) {
				1089	// Sink loop 'i' by 'permIndex - i' levels deeper into the loop nest.
				1090	sinkLoop(loops[i], permIndex - i);
				1091	}
				1092	}
				1093	assert(loopNestRootIndex != -1 && "invalid root index");
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1094	node->inst = loops[loopNestRootIndex].getInstruction();
MLIR Team	8f5f2c7	2019-02-15 17:32:18	[diff] [blame]	1095	}
				1096
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	1097	// TODO(mlir-team): improve/complete this when we have target data.
				1098	unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
				1099	auto elementType = memRefType.getElementType();
				1100
				1101	unsigned sizeInBits;
				1102	if (elementType.isIntOrFloat()) {
				1103	sizeInBits = elementType.getIntOrFloatBitWidth();
				1104	} else {
				1105	auto vectorType = elementType.cast<VectorType>();
				1106	sizeInBits =
				1107	vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
				1108	}
				1109	return llvm::divideCeil(sizeInBits, 8);
				1110	}
				1111
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1112	// Creates and returns a private (single-user) memref for fused loop rooted
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1113	// at 'forOp', with (potentially reduced) memref size based on the
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1114	// MemRefRegion written to by 'srcStoreOpInst' at depth 'dstLoopDepth'.
				1115	// TODO(bondhugula): consider refactoring the common code from generateDma and
				1116	// this one.
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	1117	static Value *createPrivateMemRef(AffineForOp forOp,
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	1118	Instruction *srcStoreOpInst,
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	1119	unsigned dstLoopDepth,
				1120	Optional<unsigned> fastMemorySpace,
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	1121	uint64_t localBufSizeThreshold) {
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1122	auto *forInst = forOp.getInstruction();
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1123
				1124	// Create builder to insert alloc op just before 'forOp'.
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1125	FuncBuilder b(forInst);
				1126	// Builder to create constants at the top level.
				1127	FuncBuilder top(forInst->getFunction());
				1128	// Create new memref type based on slice bounds.
				1129	auto *oldMemRef = srcStoreOpInst->cast<StoreOp>()->getMemRef();
				1130	auto oldMemRefType = oldMemRef->getType().cast<MemRefType>();
				1131	unsigned rank = oldMemRefType.getRank();
				1132
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1133	// Compute MemRefRegion for 'srcStoreOpInst' at depth 'dstLoopDepth'.
Uday Bondhugula	0f50414	2019-02-04 21:48:44	[diff] [blame]	1134	MemRefRegion region(srcStoreOpInst->getLoc());
River Riddle	1e55ae1	2019-03-08 06:14:47	[diff] [blame]	1135	bool validRegion = succeeded(region.compute(srcStoreOpInst, dstLoopDepth));
MLIR Team	d42ef78	2019-03-04 19:01:25	[diff] [blame]	1136	(void)validRegion;
				1137	assert(validRegion && "unexpected memref region failure");
River Riddle	6859f33	2019-01-23 22:39:45	[diff] [blame]	1138	SmallVector<int64_t, 4> newShape;
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1139	std::vector<SmallVector<int64_t, 4>> lbs;
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1140	SmallVector<int64_t, 8> lbDivisors;
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1141	lbs.reserve(rank);
				1142	// Query 'region' for 'newShape' and lower bounds of MemRefRegion accessed
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1143	// by 'srcStoreOpInst' at depth 'dstLoopDepth'.
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1144	Optional<int64_t> numElements =
Uday Bondhugula	0f50414	2019-02-04 21:48:44	[diff] [blame]	1145	region.getConstantBoundingSizeAndShape(&newShape, &lbs, &lbDivisors);
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	1146	assert(numElements.hasValue() &&
				1147	"non-constant number of elts in local buffer");
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1148
Uday Bondhugula	0f50414	2019-02-04 21:48:44	[diff] [blame]	1149	const FlatAffineConstraints *cst = region.getConstraints();
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1150	// 'outerIVs' holds the values that this memory region is symbolic/paramteric
				1151	// on; this would correspond to loop IVs surrounding the level at which the
				1152	// slice is being materialized.
				1153	SmallVector<Value *, 8> outerIVs;
				1154	cst->getIdValues(rank, cst->getNumIds(), &outerIVs);
				1155
				1156	// Build 'rank' AffineExprs from MemRefRegion 'lbs'
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1157	SmallVector<AffineExpr, 4> offsets;
				1158	offsets.reserve(rank);
				1159	for (unsigned d = 0; d < rank; ++d) {
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1160	assert(lbs[d].size() == cst->getNumCols() - rank && "incorrect bound size");
				1161
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1162	AffineExpr offset = top.getAffineConstantExpr(0);
				1163	for (unsigned j = 0, e = cst->getNumCols() - rank - 1; j < e; j++) {
				1164	offset = offset + lbs[d][j] * top.getAffineDimExpr(j);
				1165	}
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1166	assert(lbDivisors[d] > 0);
				1167	offset =
				1168	(offset + lbs[d][cst->getNumCols() - 1 - rank]).floorDiv(lbDivisors[d]);
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1169	offsets.push_back(offset);
				1170	}
				1171
				1172	// Create 'newMemRefType' using 'newShape' from MemRefRegion accessed
				1173	// by 'srcStoreOpInst'.
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	1174	uint64_t bufSize =
				1175	getMemRefEltSizeInBytes(oldMemRefType) * numElements.getValue();
				1176	unsigned newMemSpace;
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	1177	if (bufSize <= localBufSizeThreshold && fastMemorySpace.hasValue()) {
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	1178	newMemSpace = fastMemorySpace.getValue();
				1179	} else {
				1180	newMemSpace = oldMemRefType.getMemorySpace();
				1181	}
				1182	auto newMemRefType = top.getMemRefType(
				1183	newShape, oldMemRefType.getElementType(), {}, newMemSpace);
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1184	// Gather alloc operands for the dynamic dimensions of the memref.
				1185	SmallVector<Value *, 4> allocOperands;
				1186	unsigned dynamicDimCount = 0;
				1187	for (auto dimSize : oldMemRefType.getShape()) {
				1188	if (dimSize == -1)
				1189	allocOperands.push_back(
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1190	top.create<DimOp>(forOp.getLoc(), oldMemRef, dynamicDimCount++));
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1191	}
				1192
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1193	// Create new private memref for fused loop 'forOp'.
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	1194	// TODO(andydavis) Create/move alloc ops for private memrefs closer to their
				1195	// consumer loop nests to reduce their live range. Currently they are added
				1196	// at the beginning of the function, because loop nests can be reordered
				1197	// during the fusion pass.
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1198	Value *newMemRef =
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1199	top.create<AllocOp>(forOp.getLoc(), newMemRefType, allocOperands);
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1200
				1201	// Build an AffineMap to remap access functions based on lower bound offsets.
				1202	SmallVector<AffineExpr, 4> remapExprs;
				1203	remapExprs.reserve(rank);
				1204	unsigned zeroOffsetCount = 0;
				1205	for (unsigned i = 0; i < rank; i++) {
				1206	if (auto constExpr = offsets[i].dyn_cast<AffineConstantExpr>())
				1207	if (constExpr.getValue() == 0)
				1208	++zeroOffsetCount;
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1209	auto dimExpr = b.getAffineDimExpr(outerIVs.size() + i);
				1210
				1211	auto remapExpr =
				1212	simplifyAffineExpr(dimExpr - offsets[i], outerIVs.size() + rank, 0);
				1213	remapExprs.push_back(remapExpr);
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1214	}
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1215	auto indexRemap =
				1216	zeroOffsetCount == rank
Nicolas Vasilache	0e7a8a9	2019-01-26 18:41:17	[diff] [blame]	1217	? AffineMap()
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1218	: b.getAffineMap(outerIVs.size() + rank, 0, remapExprs, {});
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1219	// Replace all users of 'oldMemRef' with 'newMemRef'.
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1220	bool ret =
				1221	replaceAllMemRefUsesWith(oldMemRef, newMemRef, {}, indexRemap,
				1222	/extraOperands=/outerIVs,
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1223	/domInstFilter=/&*forOp.getBody()->begin());
Uday Bondhugula	94a03f8	2019-01-22 21:58:52	[diff] [blame]	1224	assert(ret && "replaceAllMemrefUsesWith should always succeed here");
MLIR Team	71495d5	2019-01-22 21:23:37	[diff] [blame]	1225	(void)ret;
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1226	return newMemRef;
				1227	}
				1228
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1229	// Does the slice have a single iteration?
				1230	static uint64_t getSliceIterationCount(
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1231	const llvm::SmallDenseMap<Instruction *, uint64_t, 8> &sliceTripCountMap) {
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1232	uint64_t iterCount = 1;
				1233	for (const auto &count : sliceTripCountMap) {
				1234	iterCount *= count.second;
				1235	}
				1236	return iterCount;
				1237	}
				1238
MLIR Team	58aa383	2019-02-16 01:12:19	[diff] [blame]	1239	// Checks if node 'srcId' (which writes to a live out memref), can be safely
				1240	// fused into node 'dstId'. Returns true if the following conditions are met:
				1241	// *) 'srcNode' writes only writes to live out 'memref'.
				1242	// *) 'srcNode' has exaclty one output edge on 'memref' (which is to 'dstId').
				1243	// *) 'dstNode' does write to 'memref'.
				1244	// *) 'dstNode's write region to 'memref' is a super set of 'srcNode's write
				1245	// region to 'memref'.
				1246	// TODO(andydavis) Generalize this to handle more live in/out cases.
				1247	static bool canFuseSrcWhichWritesToLiveOut(unsigned srcId, unsigned dstId,
				1248	Value *memref,
				1249	MemRefDependenceGraph *mdg) {
				1250	auto *srcNode = mdg->getNode(srcId);
				1251	auto *dstNode = mdg->getNode(dstId);
				1252
				1253	// Return false if any of the following are true:
				1254	// *) 'srcNode' writes to a live in/out memref other than 'memref'.
				1255	// *) 'srcNode' has more than one output edge on 'memref'.
				1256	// *) 'dstNode' does not write to 'memref'.
				1257	if (srcNode->getStoreOpCount(memref) != 1 \|\|
				1258	mdg->getOutEdgeCount(srcNode->id, memref) != 1 \|\|
				1259	dstNode->getStoreOpCount(memref) == 0)
				1260	return false;
				1261	// Compute MemRefRegion 'srcWriteRegion' for 'srcStoreOpInst' on 'memref'.
				1262	auto *srcStoreOpInst = srcNode->stores.front();
				1263	MemRefRegion srcWriteRegion(srcStoreOpInst->getLoc());
River Riddle	1e55ae1	2019-03-08 06:14:47	[diff] [blame]	1264	if (failed(srcWriteRegion.compute(srcStoreOpInst, /loopDepth=/0))) {
MLIR Team	d42ef78	2019-03-04 19:01:25	[diff] [blame]	1265	LLVM_DEBUG(llvm::dbgs()
				1266	<< "Unable to compute MemRefRegion for source operation\n.");
				1267	return false;
				1268	}
MLIR Team	58aa383	2019-02-16 01:12:19	[diff] [blame]	1269	SmallVector<int64_t, 4> srcShape;
				1270	// Query 'srcWriteRegion' for 'srcShape' and 'srcNumElements'.
				1271	// by 'srcStoreOpInst' at depth 'dstLoopDepth'.
				1272	Optional<int64_t> srcNumElements =
				1273	srcWriteRegion.getConstantBoundingSizeAndShape(&srcShape);
				1274	if (!srcNumElements.hasValue())
				1275	return false;
				1276
				1277	// Compute MemRefRegion 'dstWriteRegion' for 'dstStoreOpInst' on 'memref'.
				1278	SmallVector<Instruction *, 2> dstStoreOps;
				1279	dstNode->getStoreOpsForMemref(memref, &dstStoreOps);
				1280	assert(dstStoreOps.size() == 1);
				1281	auto *dstStoreOpInst = dstStoreOps[0];
				1282	MemRefRegion dstWriteRegion(dstStoreOpInst->getLoc());
River Riddle	1e55ae1	2019-03-08 06:14:47	[diff] [blame]	1283	if (failed(dstWriteRegion.compute(dstStoreOpInst, /loopDepth=/0))) {
MLIR Team	d42ef78	2019-03-04 19:01:25	[diff] [blame]	1284	LLVM_DEBUG(llvm::dbgs()
				1285	<< "Unable to compute MemRefRegion for dest operation\n.");
				1286	return false;
				1287	}
MLIR Team	58aa383	2019-02-16 01:12:19	[diff] [blame]	1288	SmallVector<int64_t, 4> dstShape;
				1289	// Query 'dstWriteRegion' for 'dstShape' and 'dstNumElements'.
				1290	// by 'dstStoreOpInst' at depth 'dstLoopDepth'.
				1291	Optional<int64_t> dstNumElements =
				1292	dstWriteRegion.getConstantBoundingSizeAndShape(&dstShape);
				1293	if (!dstNumElements.hasValue())
				1294	return false;
				1295
				1296	// Return false if write region is not a superset of 'srcNodes' write
				1297	// region to 'memref'.
				1298	// TODO(andydavis) Check the shape and lower bounds here too.
				1299	if (srcNumElements != dstNumElements)
				1300	return false;
				1301	return true;
				1302	}
				1303
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1304	// Computes the union of all slice bounds computed between 'srcOpInst'
				1305	// and each load op in 'dstLoadOpInsts' at 'dstLoopDepth', and returns
				1306	// the union in 'sliceState'. Returns true on success, false otherwise.
				1307	// TODO(andydavis) Move this to a loop fusion utility function.
				1308	static bool getSliceUnion(Instruction *srcOpInst,
				1309	ArrayRef<Instruction *> dstLoadOpInsts,
				1310	unsigned numSrcLoopIVs, unsigned dstLoopDepth,
				1311	ComputationSliceState *sliceState) {
				1312	MemRefAccess srcAccess(srcOpInst);
				1313	unsigned numDstLoadOpInsts = dstLoadOpInsts.size();
				1314	assert(numDstLoadOpInsts > 0);
				1315	// Compute the slice bounds between 'srcOpInst' and 'dstLoadOpInsts[0]'.
River Riddle	1e55ae1	2019-03-08 06:14:47	[diff] [blame]	1316	if (failed(mlir::getBackwardComputationSliceState(
				1317	srcAccess, MemRefAccess(dstLoadOpInsts[0]), dstLoopDepth,
				1318	sliceState)))
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1319	return false;
				1320	// Handle the common case of one dst load without a copy.
				1321	if (numDstLoadOpInsts == 1)
				1322	return true;
				1323
				1324	// Initialize 'sliceUnionCst' with the bounds computed in previous step.
				1325	FlatAffineConstraints sliceUnionCst;
River Riddle	1e55ae1	2019-03-08 06:14:47	[diff] [blame]	1326	if (failed(sliceState->getAsConstraints(&sliceUnionCst))) {
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1327	LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice bound constraints\n.");
				1328	return false;
				1329	}
				1330
				1331	// Compute the union of slice bounds between 'srcOpInst' and each load
				1332	// in 'dstLoadOpInsts' in range [1, numDstLoadOpInsts), in 'sliceUnionCst'.
				1333	for (unsigned i = 1; i < numDstLoadOpInsts; ++i) {
				1334	MemRefAccess dstAccess(dstLoadOpInsts[i]);
				1335	// Compute slice bounds for 'srcOpInst' and 'dstLoadOpInsts[i]'.
				1336	ComputationSliceState tmpSliceState;
River Riddle	1e55ae1	2019-03-08 06:14:47	[diff] [blame]	1337	if (failed(mlir::getBackwardComputationSliceState(
				1338	srcAccess, dstAccess, dstLoopDepth, &tmpSliceState))) {
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1339	LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice bounds\n.");
				1340	return false;
				1341	}
				1342
				1343	// Compute constraints for 'tmpSliceState' in 'tmpSliceCst'.
				1344	FlatAffineConstraints tmpSliceCst;
River Riddle	1e55ae1	2019-03-08 06:14:47	[diff] [blame]	1345	if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) {
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1346	LLVM_DEBUG(llvm::dbgs()
				1347	<< "Unable to compute slice bound constraints\n.");
				1348	return false;
				1349	}
				1350	// Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'.
River Riddle	1e55ae1	2019-03-08 06:14:47	[diff] [blame]	1351	if (failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) {
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1352	LLVM_DEBUG(llvm::dbgs()
				1353	<< "Unable to compute union bounding box of slice bounds.\n.");
				1354	return false;
				1355	}
				1356	}
				1357
				1358	// Convert any dst loop IVs which are symbol identifiers to dim identifiers.
				1359	sliceUnionCst.convertLoopIVSymbolsToDims();
				1360
				1361	sliceState->clearBounds();
				1362	sliceState->lbs.resize(numSrcLoopIVs, AffineMap());
				1363	sliceState->ubs.resize(numSrcLoopIVs, AffineMap());
				1364
				1365	// Get slice bounds from slice union constraints 'sliceUnionCst'.
				1366	sliceUnionCst.getSliceBounds(numSrcLoopIVs, srcOpInst->getContext(),
				1367	&sliceState->lbs, &sliceState->ubs);
				1368	// Add slice bound operands of union.
				1369	SmallVector<Value *, 4> sliceBoundOperands;
				1370	sliceUnionCst.getIdValues(numSrcLoopIVs,
				1371	sliceUnionCst.getNumDimAndSymbolIds(),
				1372	&sliceBoundOperands);
				1373	// Give each bound its own copy of 'sliceBoundOperands' for subsequent
				1374	// canonicalization.
				1375	sliceState->lbOperands.resize(numSrcLoopIVs, sliceBoundOperands);
				1376	sliceState->ubOperands.resize(numSrcLoopIVs, sliceBoundOperands);
				1377	return true;
				1378	}
				1379
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1380	// Checks the profitability of fusing a backwards slice of the loop nest
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	1381	// surrounding 'srcOpInst' into the loop nest surrounding 'dstLoadOpInsts'.
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1382	// The argument 'srcStoreOpInst' is used to calculate the storage reduction on
				1383	// the memref being produced and consumed, which is an input to the cost model.
				1384	// For producer-constumer fusion, 'srcStoreOpInst' will be the same as
				1385	// 'srcOpInst', as we are slicing w.r.t to that producer.
				1386	// For input-reuse fusion, 'srcOpInst' will be the src loop nest LoadOp which
				1387	// reads from the same memref as dst loop nest load ops, and 'srcStoreOpInst'
				1388	// will be the unique store op in the src node, which will be used to check
				1389	// that the write region is the same after input-reuse fusion.
Uday Bondhugula	b4a1443	2019-01-26 00:00:50	[diff] [blame]	1390	// Returns true if it is profitable to fuse the candidate loop nests. Returns
				1391	// false otherwise. `dstLoopDepth` is set to the most profitable depth at which
				1392	// to materialize the source loop nest slice.
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1393	// The profitability model executes the following steps:
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1394	// *) Computes the backward computation slice at 'srcOpInst'. This
				1395	// computation slice of the loop nest surrounding 'srcOpInst' is
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1396	// represented by modified src loop bounds in 'sliceState', which are
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1397	// functions of loop IVs in the loop nest surrounding 'srcOpInst'.
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1398	// *) Computes the cost of unfused src/dst loop nests (currently the cost of a
				1399	// loop nest is the total number of dynamic operation instances in the loop
				1400	// nest).
				1401	// *) Computes the cost of fusing a slice of the src loop nest into the dst
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1402	// loop nest at various values of dst loop depth, attempting to fuse
				1403	// the largest compution slice at the maximal dst loop depth (closest to the
				1404	// load) to minimize reuse distance and potentially enable subsequent
				1405	// load/store forwarding.
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	1406	// NOTE: If the dst loop nest includes multiple loads in 'dstLoadOpInsts' for
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1407	// the same memref as is written by 'srcOpInst', then the union of slice
				1408	// loop bounds is used to compute the slice and associated slice cost.
Uday Bondhugula	b4a1443	2019-01-26 00:00:50	[diff] [blame]	1409	// NOTE: 'dstLoopDepth' refers to the loop depth within the destination loop
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1410	// nest, at which the src computation slice is inserted/fused.
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1411	// NOTE: We attempt to maximize the dst loop depth, but there are cases
				1412	// where a particular setting for 'dstLoopNest' might fuse an unsliced
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1413	// loop (within the src computation slice) at a depth which results in
				1414	// execessive recomputation (see unit tests for examples).
				1415	// *) Compares the total cost of the unfused loop nests to the min cost fused
				1416	// loop nest computed in the previous step, and returns true if the latter
				1417	// is lower.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	1418	static bool isFusionProfitable(Instruction *srcOpInst,
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1419	Instruction *srcStoreOpInst,
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	1420	ArrayRef<Instruction *> dstLoadOpInsts,
				1421	ArrayRef<Instruction *> dstStoreOpInsts,
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1422	ComputationSliceState *sliceState,
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	1423	unsigned *dstLoopDepth, bool maximalFusion) {
Uday Bondhugula	06d21d9	2019-01-25 01:01:49	[diff] [blame]	1424	LLVM_DEBUG({
				1425	llvm::dbgs() << "Checking whether fusion is profitable between:\n";
Uday Bondhugula	a1dad3a	2019-02-20 02:17:19	[diff] [blame]	1426	llvm::dbgs() << " " << *srcOpInst << " and \n";
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	1427	for (auto dstOpInst : dstLoadOpInsts) {
Uday Bondhugula	a1dad3a	2019-02-20 02:17:19	[diff] [blame]	1428	llvm::dbgs() << " " << *dstOpInst << "\n";
Uday Bondhugula	06d21d9	2019-01-25 01:01:49	[diff] [blame]	1429	};
				1430	});
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1431
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1432	// Compute cost of sliced and unsliced src loop nest.
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	1433	SmallVector<AffineForOp, 4> srcLoopIVs;
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1434	getLoopIVs(*srcOpInst, &srcLoopIVs);
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1435	unsigned numSrcLoopIVs = srcLoopIVs.size();
				1436
				1437	// Walk src loop nest and collect stats.
				1438	LoopNestStats srcLoopNestStats;
				1439	LoopNestStatsCollector srcStatsCollector(&srcLoopNestStats);
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1440	srcStatsCollector.collect(srcLoopIVs[0].getInstruction());
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1441	// Currently only constant trip count loop nests are supported.
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1442	if (srcStatsCollector.hasLoopWithNonConstTripCount) {
				1443	LLVM_DEBUG(llvm::dbgs() << "Non-constant trip count loops unsupported.\n");
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1444	return false;
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1445	}
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1446	// Compute cost of dst loop nest.
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	1447	SmallVector<AffineForOp, 4> dstLoopIVs;
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	1448	getLoopIVs(*dstLoadOpInsts[0], &dstLoopIVs);
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1449
				1450	LoopNestStats dstLoopNestStats;
				1451	LoopNestStatsCollector dstStatsCollector(&dstLoopNestStats);
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1452	dstStatsCollector.collect(dstLoopIVs[0].getInstruction());
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1453	// Currently only constant trip count loop nests are supported.
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1454	if (dstStatsCollector.hasLoopWithNonConstTripCount) {
				1455	LLVM_DEBUG(llvm::dbgs() << "Non-constant trip count loops unsupported.\n");
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1456	return false;
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1457	}
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1458
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	1459	// Compute the maximum loop depth at which we can can insert the src slice
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1460	// and still satisfy dest loop nest dependences, for producer-consumer fusion.
				1461	unsigned maxDstLoopDepth =
				1462	(srcOpInst == srcStoreOpInst)
				1463	? getMaxLoopDepth(dstLoadOpInsts, dstStoreOpInsts)
				1464	: dstLoopIVs.size();
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1465	if (maxDstLoopDepth == 0) {
				1466	LLVM_DEBUG(llvm::dbgs() << "Can't fuse: maxDstLoopDepth == 0 .\n");
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1467	return false;
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1468	}
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1469
				1470	// Search for min cost value for 'dstLoopDepth'. At each value of
				1471	// 'dstLoopDepth' from 'maxDstLoopDepth' to '1', compute computation slice
				1472	// bounds between 'srcOpInst' and each op in 'dstOpinsts' (taking the union
				1473	// of these bounds). Next the union slice bounds are used to calculate
				1474	// the cost of the slice and the cost of the slice inserted into the dst
				1475	// loop nest at 'dstLoopDepth'.
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1476	uint64_t minFusedLoopNestComputeCost = std::numeric_limits<uint64_t>::max();
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1477	double maxStorageReduction = 0.0;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1478	Optional<uint64_t> sliceMemEstimate = None;
				1479
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1480	SmallVector<ComputationSliceState, 4> sliceStates;
				1481	sliceStates.resize(maxDstLoopDepth);
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1482	// The best loop depth at which to materialize the slice.
				1483	Optional<unsigned> bestDstLoopDepth = None;
				1484
				1485	// Compute op instance count for the src loop nest without iteration slicing.
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1486	uint64_t srcLoopNestCost =
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1487	getComputeCost(srcLoopIVs[0].getInstruction(), &srcLoopNestStats,
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1488	/tripCountOverrideMap=/nullptr,
				1489	/computeCostMap=/nullptr);
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1490
MLIR Team	b9dde91	2019-02-06 19:01:10	[diff] [blame]	1491	// Compute src loop nest write region size.
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1492	MemRefRegion srcWriteRegion(srcStoreOpInst->getLoc());
River Riddle	1e55ae1	2019-03-08 06:14:47	[diff] [blame]	1493	if (failed(srcWriteRegion.compute(srcStoreOpInst, /loopDepth=/0))) {
MLIR Team	d42ef78	2019-03-04 19:01:25	[diff] [blame]	1494	LLVM_DEBUG(llvm::dbgs()
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1495	<< "Unable to compute MemRefRegion for source instruction\n.");
MLIR Team	d42ef78	2019-03-04 19:01:25	[diff] [blame]	1496	return false;
				1497	}
				1498
MLIR Team	b9dde91	2019-02-06 19:01:10	[diff] [blame]	1499	Optional<int64_t> maybeSrcWriteRegionSizeBytes =
				1500	srcWriteRegion.getRegionSize();
				1501	if (!maybeSrcWriteRegionSizeBytes.hasValue())
				1502	return false;
				1503	int64_t srcWriteRegionSizeBytes = maybeSrcWriteRegionSizeBytes.getValue();
				1504
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1505	// Compute op instance count for the src loop nest.
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1506	uint64_t dstLoopNestCost =
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1507	getComputeCost(dstLoopIVs[0].getInstruction(), &dstLoopNestStats,
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1508	/tripCountOverrideMap=/nullptr,
				1509	/computeCostMap=/nullptr);
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1510
MLIR Team	b9dde91	2019-02-06 19:01:10	[diff] [blame]	1511	// Evaluate all depth choices for materializing the slice in the destination
				1512	// loop nest.
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1513	llvm::SmallDenseMap<Instruction *, uint64_t, 8> sliceTripCountMap;
				1514	DenseMap<Instruction *, int64_t> computeCostMap;
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1515	for (unsigned i = maxDstLoopDepth; i >= 1; --i) {
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1516	// Compute the union of slice bounds of all ops in 'dstLoadOpInsts'.
				1517	if (!getSliceUnion(srcOpInst, dstLoadOpInsts, numSrcLoopIVs, i,
				1518	&sliceStates[i - 1])) {
				1519	LLVM_DEBUG(llvm::dbgs()
				1520	<< "getSliceUnion failed for loopDepth: " << i << "\n");
				1521	continue;
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1522	}
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1523
Uday Bondhugula	b4a1443	2019-01-26 00:00:50	[diff] [blame]	1524	// Build trip count map for computation slice. We'll skip cases where the
				1525	// trip count was non-constant.
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1526	sliceTripCountMap.clear();
				1527	if (!buildSliceTripCountMap(srcOpInst, &sliceStates[i - 1],
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1528	&sliceTripCountMap)) {
				1529	LLVM_DEBUG(llvm::dbgs() << "Unable to build slice trip count map.\n.");
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1530	continue;
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1531	}
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1532
				1533	// Checks whether a store to load forwarding will happen.
				1534	int64_t sliceIterationCount = getSliceIterationCount(sliceTripCountMap);
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1535	assert(sliceIterationCount > 0);
Uday Bondhugula	b4a1443	2019-01-26 00:00:50	[diff] [blame]	1536	bool storeLoadFwdGuaranteed = (sliceIterationCount == 1);
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1537
				1538	// Compute cost of fusion for this dest loop depth.
				1539
				1540	computeCostMap.clear();
				1541
				1542	// The store and loads to this memref will disappear.
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1543	// TODO(andydavis) Add load coalescing to memref data flow opt pass.
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1544	if (storeLoadFwdGuaranteed) {
				1545	// A single store disappears: -1 for that.
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1546	computeCostMap[srcLoopIVs[numSrcLoopIVs - 1].getInstruction()] = -1;
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	1547	for (auto *loadOp : dstLoadOpInsts) {
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1548	auto *parentInst = loadOp->getParentInst();
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	1549	if (parentInst && parentInst->isa<AffineForOp>())
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1550	computeCostMap[parentInst] = -1;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1551	}
				1552	}
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1553
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1554	// Compute op instance count for the src loop nest with iteration slicing.
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1555	int64_t sliceComputeCost =
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1556	getComputeCost(srcLoopIVs[0].getInstruction(), &srcLoopNestStats,
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1557	/tripCountOverrideMap=/&sliceTripCountMap,
				1558	/computeCostMap=/&computeCostMap);
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1559
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1560	// Compute cost of fusion for this depth.
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1561	computeCostMap[dstLoopIVs[i - 1].getInstruction()] = sliceComputeCost;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1562
				1563	int64_t fusedLoopNestComputeCost =
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1564	getComputeCost(dstLoopIVs[0].getInstruction(), &dstLoopNestStats,
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1565	/tripCountOverrideMap=/nullptr, &computeCostMap);
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1566
				1567	double additionalComputeFraction =
				1568	fusedLoopNestComputeCost /
				1569	(static_cast<double>(srcLoopNestCost) + dstLoopNestCost) -
				1570	1;
				1571
MLIR Team	b9dde91	2019-02-06 19:01:10	[diff] [blame]	1572	// Compute what the slice write MemRefRegion would be, if the src loop
				1573	// nest slice 'sliceStates[i - 1]' were to be inserted into the dst loop
				1574	// nest at loop depth 'i'
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1575	MemRefRegion sliceWriteRegion(srcStoreOpInst->getLoc());
River Riddle	1e55ae1	2019-03-08 06:14:47	[diff] [blame]	1576	if (failed(sliceWriteRegion.compute(srcStoreOpInst, /loopDepth=/0,
				1577	&sliceStates[i - 1]))) {
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1578	LLVM_DEBUG(llvm::dbgs()
				1579	<< "Failed to compute slice write region at loopDepth: " << i
				1580	<< "\n");
MLIR Team	d42ef78	2019-03-04 19:01:25	[diff] [blame]	1581	continue;
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1582	}
MLIR Team	d42ef78	2019-03-04 19:01:25	[diff] [blame]	1583
MLIR Team	b9dde91	2019-02-06 19:01:10	[diff] [blame]	1584	Optional<int64_t> maybeSliceWriteRegionSizeBytes =
				1585	sliceWriteRegion.getRegionSize();
				1586	if (!maybeSliceWriteRegionSizeBytes.hasValue() \|\|
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1587	maybeSliceWriteRegionSizeBytes.getValue() == 0) {
				1588	LLVM_DEBUG(llvm::dbgs()
				1589	<< "Failed to get slice write region size at loopDepth: " << i
				1590	<< "\n");
MLIR Team	b9dde91	2019-02-06 19:01:10	[diff] [blame]	1591	continue;
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1592	}
MLIR Team	b9dde91	2019-02-06 19:01:10	[diff] [blame]	1593	int64_t sliceWriteRegionSizeBytes =
				1594	maybeSliceWriteRegionSizeBytes.getValue();
				1595
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1596	// If we are fusing for reuse, check that write regions remain the same.
				1597	// TODO(andydavis) Write region check should check sizes and offsets in
				1598	// each dimension, so that we are sure they are covering the same memref
				1599	// region. Also, move this out to a isMemRefRegionSuperSet helper function.
				1600	if (srcOpInst != srcStoreOpInst &&
				1601	sliceWriteRegionSizeBytes != srcWriteRegionSizeBytes)
				1602	continue;
				1603
MLIR Team	b9dde91	2019-02-06 19:01:10	[diff] [blame]	1604	double storageReduction = static_cast<double>(srcWriteRegionSizeBytes) /
				1605	static_cast<double>(sliceWriteRegionSizeBytes);
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1606
Uday Bondhugula	06d21d9	2019-01-25 01:01:49	[diff] [blame]	1607	LLVM_DEBUG({
				1608	std::stringstream msg;
				1609	msg << " evaluating fusion profitability at depth : " << i << "\n"
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	1610	<< std::fixed << std::setprecision(2)
				1611	<< " additional compute fraction: "
Uday Bondhugula	06d21d9	2019-01-25 01:01:49	[diff] [blame]	1612	<< 100.0 * additionalComputeFraction << "%\n"
				1613	<< " storage reduction factor: " << storageReduction << "x\n"
				1614	<< " fused nest cost: " << fusedLoopNestComputeCost << "\n"
Uday Bondhugula	a1dad3a	2019-02-20 02:17:19	[diff] [blame]	1615	<< " slice iteration count: " << sliceIterationCount << "\n"
				1616	<< " src write region size: " << srcWriteRegionSizeBytes << "\n"
				1617	<< " slice write region size: " << sliceWriteRegionSizeBytes
				1618	<< "\n";
Uday Bondhugula	06d21d9	2019-01-25 01:01:49	[diff] [blame]	1619	llvm::dbgs() << msg.str();
				1620	});
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1621
				1622	double computeToleranceThreshold =
				1623	clFusionAddlComputeTolerance.getNumOccurrences() > 0
				1624	? clFusionAddlComputeTolerance
				1625	: LoopFusion::kComputeToleranceThreshold;
				1626
				1627	// TODO(b/123247369): This is a placeholder cost model.
				1628	// Among all choices that add an acceptable amount of redundant computation
				1629	// (as per computeToleranceThreshold), we will simply pick the one that
				1630	// reduces the intermediary size the most.
				1631	if ((storageReduction > maxStorageReduction) &&
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	1632	(maximalFusion \|\|
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1633	(additionalComputeFraction < computeToleranceThreshold))) {
				1634	maxStorageReduction = storageReduction;
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1635	bestDstLoopDepth = i;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1636	minFusedLoopNestComputeCost = fusedLoopNestComputeCost;
MLIR Team	b9dde91	2019-02-06 19:01:10	[diff] [blame]	1637	sliceMemEstimate = sliceWriteRegionSizeBytes;
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1638	}
				1639	}
				1640
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1641	// A simple cost model: fuse if it reduces the memory footprint. If
				1642	// -maximal-fusion is set, fuse nevertheless.
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1643
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	1644	if (!maximalFusion && !bestDstLoopDepth.hasValue()) {
Uday Bondhugula	a1dad3a	2019-02-20 02:17:19	[diff] [blame]	1645	LLVM_DEBUG(
				1646	llvm::dbgs()
				1647	<< "All fusion choices involve more than the threshold amount of "
				1648	"redundant computation; NOT fusing.\n");
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1649	return false;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1650	}
				1651
MLIR Team	d42ef78	2019-03-04 19:01:25	[diff] [blame]	1652	if (!bestDstLoopDepth.hasValue()) {
				1653	LLVM_DEBUG(llvm::dbgs() << "no fusion depth could be evaluated.\n");
				1654	return false;
				1655	}
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1656
				1657	// Set dstLoopDepth based on best values from search.
				1658	*dstLoopDepth = bestDstLoopDepth.getValue();
				1659
				1660	LLVM_DEBUG(
Uday Bondhugula	06d21d9	2019-01-25 01:01:49	[diff] [blame]	1661	llvm::dbgs() << " LoopFusion fusion stats:"
				1662	<< "\n best loop depth: " << bestDstLoopDepth
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1663	<< "\n src loop nest compute cost: " << srcLoopNestCost
				1664	<< "\n dst loop nest compute cost: " << dstLoopNestCost
				1665	<< "\n fused loop nest compute cost: "
				1666	<< minFusedLoopNestComputeCost << "\n");
				1667
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1668	auto dstMemSize = getMemoryFootprintBytes(dstLoopIVs[0]);
				1669	auto srcMemSize = getMemoryFootprintBytes(srcLoopIVs[0]);
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1670
				1671	Optional<double> storageReduction = None;
				1672
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	1673	if (!maximalFusion) {
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1674	if (!dstMemSize.hasValue() \|\| !srcMemSize.hasValue()) {
				1675	LLVM_DEBUG(
				1676	llvm::dbgs()
				1677	<< " fusion memory benefit cannot be evaluated; NOT fusing.\n");
				1678	return false;
				1679	}
				1680
				1681	auto srcMemSizeVal = srcMemSize.getValue();
				1682	auto dstMemSizeVal = dstMemSize.getValue();
				1683
				1684	assert(sliceMemEstimate.hasValue() && "expected value");
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1685	auto fusedMem = dstMemSizeVal + sliceMemEstimate.getValue();
				1686
				1687	LLVM_DEBUG(llvm::dbgs() << " src mem: " << srcMemSizeVal << "\n"
				1688	<< " dst mem: " << dstMemSizeVal << "\n"
				1689	<< " fused mem: " << fusedMem << "\n"
				1690	<< " slice mem: " << sliceMemEstimate << "\n");
				1691
				1692	if (fusedMem > srcMemSizeVal + dstMemSizeVal) {
				1693	LLVM_DEBUG(llvm::dbgs() << "Fusion is not profitable; NOT fusing.\n");
				1694	return false;
				1695	}
				1696	storageReduction =
				1697	100.0 *
				1698	(1.0 - fusedMem / (static_cast<double>(srcMemSizeVal) + dstMemSizeVal));
				1699	}
				1700
				1701	double additionalComputeFraction =
				1702	100.0 * (minFusedLoopNestComputeCost /
				1703	(static_cast<double>(srcLoopNestCost) + dstLoopNestCost) -
				1704	1);
MLIR Team	5c5739d	2019-01-25 06:27:40	[diff] [blame]	1705	(void)additionalComputeFraction;
Uday Bondhugula	06d21d9	2019-01-25 01:01:49	[diff] [blame]	1706	LLVM_DEBUG({
				1707	std::stringstream msg;
				1708	msg << " fusion is most profitable at depth " << *dstLoopDepth << " with "
MLIR Team	8564b27	2019-02-22 15:48:59	[diff] [blame]	1709	<< std::setprecision(2) << additionalComputeFraction
Uday Bondhugula	06d21d9	2019-01-25 01:01:49	[diff] [blame]	1710	<< "% redundant computation and a ";
				1711	msg << (storageReduction.hasValue()
				1712	? std::to_string(storageReduction.getValue())
				1713	: "<unknown>");
				1714	msg << "% storage reduction.\n";
				1715	llvm::dbgs() << msg.str();
				1716	});
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1717
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1718	// Update return parameter 'sliceState' with 'bestSliceState'.
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1719	ComputationSliceState bestSliceState = &sliceStates[dstLoopDepth - 1];
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1720	sliceState->lbs = bestSliceState->lbs;
				1721	sliceState->ubs = bestSliceState->ubs;
				1722	sliceState->lbOperands = bestSliceState->lbOperands;
				1723	sliceState->ubOperands = bestSliceState->ubOperands;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1724
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1725	// Canonicalize slice bound affine maps.
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1726	for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
Nicolas Vasilache	0e7a8a9	2019-01-26 18:41:17	[diff] [blame]	1727	if (sliceState->lbs[i] != AffineMap()) {
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1728	canonicalizeMapAndOperands(&sliceState->lbs[i],
				1729	&sliceState->lbOperands[i]);
				1730	}
Nicolas Vasilache	0e7a8a9	2019-01-26 18:41:17	[diff] [blame]	1731	if (sliceState->ubs[i] != AffineMap()) {
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1732	canonicalizeMapAndOperands(&sliceState->ubs[i],
				1733	&sliceState->ubOperands[i]);
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1734	}
				1735	}
				1736	return true;
				1737	}
				1738
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1739	// GreedyFusion greedily fuses loop nests which have a producer/consumer or
				1740	// input-reuse relationship on a memref, with the goal of improving locality.
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	1741	//
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1742	// The steps of the producer-consumer fusion algorithm are as follows:
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1743	//
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1744	// *) A worklist is initialized with node ids from the dependence graph.
				1745	// *) For each node id in the worklist:
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1746	// *) Pop a AffineForOp of the worklist. This 'dstAffineForOp' will be a
				1747	// candidate destination AffineForOp into which fusion will be attempted.
				1748	// *) Add each LoadOp currently in 'dstAffineForOp' into list 'dstLoadOps'.
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1749	// *) For each LoadOp in 'dstLoadOps' do:
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1750	// *) Lookup dependent loop nests which have a single store op to the same
				1751	// memref.
				1752	// *) Check if dependences would be violated by the fusion.
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1753	// *) Get a computation slice of 'srcLoopNest', which adjusts its loop
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1754	// bounds to be functions of 'dstLoopNest' IVs and symbols.
				1755	// *) Fuse the 'srcLoopNest' computation slice into the 'dstLoopNest',
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1756	// at a loop depth determined by the cost model in 'isFusionProfitable'.
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	1757	// *) Add the newly fused load/store operation instructions to the state,
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1758	// and also add newly fuse load ops to 'dstLoopOps' to be considered
				1759	// as fusion dst load ops in another iteration.
				1760	// *) Remove old src loop nest and its associated state.
				1761	//
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1762	// The steps of the input-reuse fusion algorithm are as follows:
				1763	//
				1764	// *) Initialize 'worklist' with node ids from the dependence graph.
				1765	// *) For each 'dstNode' in the worklist:
				1766	// *) Find a candidate sibling node 'sibNode' to fuse with 'dstNode' which
				1767	// loads from the same memref, but which has no dependence paths to/from.
				1768	// *) Get a computation slice of 'sibLoopNest', which adjusts its loop
				1769	// bounds to be functions of 'dstLoopNest' IVs and symbols.
				1770	// *) Fuse the 'sibLoopNest' computation slice into the 'dstLoopNest',
				1771	// at a loop depth determined by the cost model in 'isFusionProfitable'.
				1772	// This function also checks that the memref write region of 'sibLoopNest',
				1773	// is preserved in the fused loop nest.
				1774	// *) Update graph state to reflect the fusion of 'sibNode' into 'dstNode'.
				1775	//
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	1776	// Given a graph where top-level instructions are vertices in the set 'V' and
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1777	// edges in the set 'E' are dependences between vertices, this algorithm
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1778	// takes O(V) time for initialization, and has runtime O(V + E).
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1779	//
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1780	// This greedy algorithm is not 'maximal' due to the current restriction of
				1781	// fusing along single producer consumer edges, but there is a TODO to fix this.
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1782	//
				1783	// TODO(andydavis) Experiment with other fusion policies.
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1784	struct GreedyFusion {
				1785	public:
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1786	// The data dependence graph to traverse during fusion.
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1787	MemRefDependenceGraph *mdg;
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1788	// Worklist of graph nodes visited during the fusion pass.
MLIR Team	a78edcd	2019-02-05 14:57:08	[diff] [blame]	1789	SmallVector<unsigned, 8> worklist;
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1790	// Set of graph nodes which are present on the worklist.
MLIR Team	a78edcd	2019-02-05 14:57:08	[diff] [blame]	1791	llvm::SmallDenseSet<unsigned, 16> worklistSet;
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1792	// Parameter for local buffer size threshold.
				1793	unsigned localBufSizeThreshold;
				1794	// Parameter for fast memory space.
				1795	Optional<unsigned> fastMemorySpace;
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	1796	// If true, ignore any additional (redundant) computation tolerance threshold
				1797	// that would have prevented fusion.
				1798	bool maximalFusion;
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	1799
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1800	using Node = MemRefDependenceGraph::Node;
				1801
				1802	GreedyFusion(MemRefDependenceGraph *mdg, unsigned localBufSizeThreshold,
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	1803	Optional<unsigned> fastMemorySpace, bool maximalFusion)
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1804	: mdg(mdg), localBufSizeThreshold(localBufSizeThreshold),
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	1805	fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {}
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1806
				1807	// Initializes 'worklist' with nodes from 'mdg'
				1808	void init() {
MLIR Team	a78edcd	2019-02-05 14:57:08	[diff] [blame]	1809	// TODO(andydavis) Add a priority queue for prioritizing nodes by different
				1810	// metrics (e.g. arithmetic intensity/flops-to-bytes ratio).
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1811	worklist.clear();
				1812	worklistSet.clear();
				1813	for (auto &idAndNode : mdg->nodes) {
				1814	const Node &node = idAndNode.second;
				1815	worklist.push_back(node.id);
				1816	worklistSet.insert(node.id);
				1817	}
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1818	}
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1819
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1820	// Run the GreedyFusion pass.
				1821	// *) First pass through the nodes fuses single-use producer nodes into their
				1822	// unique consumer.
				1823	// *) Second pass fuses sibling nodes which share no dependence edges.
				1824	// *) Third pass fuses any remaining producer nodes into their users.
				1825	void run() {
MLIR Team	c1ff9e8	2019-03-06 04:33:30	[diff] [blame]	1826	// TODO(andydavis) Run this repeatedly until a fixed-point is reached.
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1827	fuseProducerConsumerNodes(/maxSrcUserCount=/1);
				1828	fuseSiblingNodes();
				1829	fuseProducerConsumerNodes(
				1830	/maxSrcUserCount=/std::numeric_limits<unsigned>::max());
				1831	eraseUnusedMemRefAllocations();
				1832	}
				1833
				1834	void fuseProducerConsumerNodes(unsigned maxSrcUserCount) {
				1835	init();
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1836	while (!worklist.empty()) {
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1837	unsigned dstId = worklist.back();
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1838	worklist.pop_back();
MLIR Team	a78edcd	2019-02-05 14:57:08	[diff] [blame]	1839	worklistSet.erase(dstId);
				1840
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1841	// Skip if this node was removed (fused into another node).
				1842	if (mdg->nodes.count(dstId) == 0)
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1843	continue;
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1844	// Get 'dstNode' into which to attempt fusion.
				1845	auto *dstNode = mdg->getNode(dstId);
				1846	// Skip if 'dstNode' is not a loop nest.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	1847	if (!dstNode->inst->isa<AffineForOp>())
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1848	continue;
MLIR Team	8f5f2c7	2019-02-15 17:32:18	[diff] [blame]	1849	// Sink sequential loops in 'dstNode' (and thus raise parallel loops)
				1850	// while preserving relative order. This can increase the maximum loop
				1851	// depth at which we can fuse a slice of a producer loop nest into a
				1852	// consumer loop nest.
				1853	sinkSequentialLoops(dstNode);
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1854
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	1855	SmallVector<Instruction *, 4> loads = dstNode->loads;
				1856	SmallVector<Instruction *, 4> dstLoadOpInsts;
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1857	DenseSet<Value *> visitedMemrefs;
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1858	while (!loads.empty()) {
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1859	// Get memref of load on top of the stack.
				1860	auto *memref = loads.back()->cast<LoadOp>()->getMemRef();
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1861	if (visitedMemrefs.count(memref) > 0)
				1862	continue;
				1863	visitedMemrefs.insert(memref);
MLIR Team	27d067e	2019-01-16 17:55:02	[diff] [blame]	1864	// Move all loads in 'loads' accessing 'memref' to 'dstLoadOpInsts'.
				1865	moveLoadsAccessingMemrefTo(memref, &loads, &dstLoadOpInsts);
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1866	// Skip if no input edges along which to fuse.
				1867	if (mdg->inEdges.count(dstId) == 0)
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	1868	continue;
MLIR Team	1e85191	2019-01-31 00:01:46	[diff] [blame]	1869	// Iterate through in edges for 'dstId' and src node id for any
				1870	// edges on 'memref'.
				1871	SmallVector<unsigned, 2> srcNodeIds;
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1872	for (auto &srcEdge : mdg->inEdges[dstId]) {
				1873	// Skip 'srcEdge' if not for 'memref'.
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	1874	if (srcEdge.value != memref)
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1875	continue;
MLIR Team	1e85191	2019-01-31 00:01:46	[diff] [blame]	1876	srcNodeIds.push_back(srcEdge.id);
				1877	}
				1878	for (unsigned srcId : srcNodeIds) {
				1879	// Skip if this node was removed (fused into another node).
				1880	if (mdg->nodes.count(srcId) == 0)
				1881	continue;
				1882	// Get 'srcNode' from which to attempt fusion into 'dstNode'.
				1883	auto *srcNode = mdg->getNode(srcId);
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1884	// Skip if 'srcNode' is not a loop nest.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	1885	if (!srcNode->inst->isa<AffineForOp>())
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1886	continue;
MLIR Team	b28009b	2019-01-23 19:11:43	[diff] [blame]	1887	// Skip if 'srcNode' has more than one store to any memref.
				1888	// TODO(andydavis) Support fusing multi-output src loop nests.
				1889	if (srcNode->stores.size() != 1)
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1890	continue;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1891
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	1892	// Skip 'srcNode' if it has in edges on 'memref'.
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1893	// TODO(andydavis) Track dependence type with edges, and just check
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	1894	// for WAW dependence edge here. Note that this check is overly
				1895	// conservative and will be removed in the future.
				1896	if (mdg->getIncomingMemRefAccesses(srcNode->id, memref) != 0)
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1897	continue;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1898
MLIR Team	58aa383	2019-02-16 01:12:19	[diff] [blame]	1899	// Skip if 'srcNode' writes to any live in or escaping memrefs,
				1900	// and cannot be fused.
				1901	bool writesToLiveInOrOut =
				1902	mdg->writesToLiveInOrEscapingMemrefs(srcNode->id);
				1903	if (writesToLiveInOrOut &&
				1904	!canFuseSrcWhichWritesToLiveOut(srcId, dstId, memref, mdg))
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	1905	continue;
				1906
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1907	// Skip if 'srcNode' out edge count on 'memref' > 'maxSrcUserCount'.
				1908	if (mdg->getOutEdgeCount(srcNode->id, memref) > maxSrcUserCount)
				1909	continue;
				1910
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	1911	// Compute an instruction list insertion point for the fused loop
				1912	// nest which preserves dependences.
MLIR Team	a78edcd	2019-02-05 14:57:08	[diff] [blame]	1913	Instruction *insertPointInst =
				1914	mdg->getFusedLoopNestInsertionPoint(srcNode->id, dstNode->id);
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	1915	if (insertPointInst == nullptr)
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1916	continue;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1917
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1918	// Get unique 'srcNode' store op.
Chris Lattner	456ad6a	2018-12-29 00:05:35	[diff] [blame]	1919	auto *srcStoreOpInst = srcNode->stores.front();
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	1920	// Gather 'dstNode' store ops to 'memref'.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	1921	SmallVector<Instruction *, 2> dstStoreOpInsts;
MLIR Team	d7c8244	2019-01-30 23:53:41	[diff] [blame]	1922	for (auto *storeOpInst : dstNode->stores)
				1923	if (storeOpInst->cast<StoreOp>()->getMemRef() == memref)
				1924	dstStoreOpInsts.push_back(storeOpInst);
				1925
Uday Bondhugula	b4a1443	2019-01-26 00:00:50	[diff] [blame]	1926	unsigned bestDstLoopDepth;
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1927	mlir::ComputationSliceState sliceState;
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	1928	// Check if fusion would be profitable.
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	1929	if (!isFusionProfitable(srcStoreOpInst, srcStoreOpInst,
				1930	dstLoadOpInsts, dstStoreOpInsts, &sliceState,
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	1931	&bestDstLoopDepth, maximalFusion))
MLIR Team	38c2fe3	2019-01-14 19:26:25	[diff] [blame]	1932	continue;
Uday Bondhugula	864d9e0	2019-01-23 17:16:24	[diff] [blame]	1933
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1934	// Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'.
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1935	auto sliceLoopNest = mlir::insertBackwardComputationSlice(
Uday Bondhugula	b4a1443	2019-01-26 00:00:50	[diff] [blame]	1936	srcStoreOpInst, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState);
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	1937	if (sliceLoopNest) {
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1938	LLVM_DEBUG(llvm::dbgs() << "\tslice loop nest:\n"
				1939	<< *sliceLoopNest.getInstruction() << "\n");
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1940	// Move 'dstAffineForOp' before 'insertPointInst' if needed.
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	1941	auto dstAffineForOp = dstNode->inst->cast<AffineForOp>();
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1942	if (insertPointInst != dstAffineForOp.getInstruction()) {
				1943	dstAffineForOp.getInstruction()->moveBefore(insertPointInst);
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	1944	}
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1945	// Update edges between 'srcNode' and 'dstNode'.
MLIR Team	a0f3db40	2019-01-29 17:36:41	[diff] [blame]	1946	mdg->updateEdges(srcNode->id, dstNode->id, memref);
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1947
				1948	// Collect slice loop stats.
				1949	LoopNestStateCollector sliceCollector;
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1950	sliceCollector.collect(sliceLoopNest.getInstruction());
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1951	// Promote single iteration slice loops to single IV value.
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1952	for (auto forOp : sliceCollector.forOps) {
				1953	promoteIfSingleIteration(forOp);
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	1954	}
MLIR Team	58aa383	2019-02-16 01:12:19	[diff] [blame]	1955	if (!writesToLiveInOrOut) {
				1956	// Create private memref for 'memref' in 'dstAffineForOp'.
				1957	SmallVector<Instruction *, 4> storesForMemref;
				1958	for (auto *storeOpInst : sliceCollector.storeOpInsts) {
				1959	if (storeOpInst->cast<StoreOp>()->getMemRef() == memref)
				1960	storesForMemref.push_back(storeOpInst);
				1961	}
				1962	assert(storesForMemref.size() == 1);
				1963	auto *newMemRef = createPrivateMemRef(
				1964	dstAffineForOp, storesForMemref[0], bestDstLoopDepth,
				1965	fastMemorySpace, localBufSizeThreshold);
				1966	visitedMemrefs.insert(newMemRef);
				1967	// Create new node in dependence graph for 'newMemRef' alloc op.
				1968	unsigned newMemRefNodeId =
				1969	mdg->addNode(newMemRef->getDefiningInst());
				1970	// Add edge from 'newMemRef' node to dstNode.
				1971	mdg->addEdge(newMemRefNodeId, dstId, newMemRef);
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1972	}
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1973
				1974	// Collect dst loop stats after memref privatizaton transformation.
				1975	LoopNestStateCollector dstLoopCollector;
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	1976	dstLoopCollector.collect(dstAffineForOp.getInstruction());
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1977
				1978	// Add new load ops to current Node load op list 'loads' to
				1979	// continue fusing based on new operands.
				1980	for (auto *loadOpInst : dstLoopCollector.loadOpInsts) {
				1981	auto *loadMemRef = loadOpInst->cast<LoadOp>()->getMemRef();
				1982	if (visitedMemrefs.count(loadMemRef) == 0)
				1983	loads.push_back(loadOpInst);
				1984	}
				1985
				1986	// Clear and add back loads and stores
				1987	mdg->clearNodeLoadAndStores(dstNode->id);
				1988	mdg->addToNode(dstId, dstLoopCollector.loadOpInsts,
				1989	dstLoopCollector.storeOpInsts);
MLIR Team	71495d5	2019-01-22 21:23:37	[diff] [blame]	1990	// Remove old src loop nest if it no longer has outgoing dependence
				1991	// edges, and it does not write to a memref which escapes the
MLIR Team	58aa383	2019-02-16 01:12:19	[diff] [blame]	1992	// function. If 'writesToLiveInOrOut' is true, then 'srcNode' has
				1993	// been fused into 'dstNode' and write region of 'dstNode' covers
				1994	// the write region of 'srcNode', and 'srcNode' has no other users
				1995	// so it is safe to remove.
				1996	if (writesToLiveInOrOut \|\| mdg->canRemoveNode(srcNode->id)) {
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	1997	mdg->removeNode(srcNode->id);
River Riddle	5052bd8	2019-02-02 00:42:18	[diff] [blame]	1998	srcNode->inst->erase();
MLIR Team	a78edcd	2019-02-05 14:57:08	[diff] [blame]	1999	} else {
				2000	// Add remaining users of 'oldMemRef' back on the worklist (if not
				2001	// already there), as its replacement with a local/private memref
				2002	// has reduced dependences on 'oldMemRef' which may have created
				2003	// new fusion opportunities.
				2004	if (mdg->outEdges.count(srcNode->id) > 0) {
				2005	SmallVector<MemRefDependenceGraph::Edge, 2> oldOutEdges =
				2006	mdg->outEdges[srcNode->id];
				2007	for (auto &outEdge : oldOutEdges) {
				2008	if (outEdge.value == memref &&
				2009	worklistSet.count(outEdge.id) == 0) {
				2010	worklist.push_back(outEdge.id);
				2011	worklistSet.insert(outEdge.id);
				2012	}
				2013	}
				2014	}
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	2015	}
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	2016	}
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	2017	}
				2018	}
				2019	}
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	2020	}
				2021
				2022	// Visits each node in the graph, and for each node, attempts to fuse it with
				2023	// its sibling nodes (nodes which share a parent, but no dependence edges).
				2024	void fuseSiblingNodes() {
				2025	init();
				2026	while (!worklist.empty()) {
				2027	unsigned dstId = worklist.back();
				2028	worklist.pop_back();
				2029	worklistSet.erase(dstId);
				2030
				2031	// Skip if this node was removed (fused into another node).
				2032	if (mdg->nodes.count(dstId) == 0)
				2033	continue;
				2034	// Get 'dstNode' into which to attempt fusion.
				2035	auto *dstNode = mdg->getNode(dstId);
				2036	// Skip if 'dstNode' is not a loop nest.
				2037	if (!dstNode->inst->isa<AffineForOp>())
				2038	continue;
				2039	// Attempt to fuse 'dstNode' with its sibling nodes in the graph.
				2040	fuseWithSiblingNodes(dstNode);
				2041	}
				2042	}
				2043
				2044	// Attempt to fuse 'dstNode' with sibling nodes in the graph.
				2045	void fuseWithSiblingNodes(Node *dstNode) {
				2046	DenseSet<unsigned> visitedSibNodeIds;
				2047	std::pair<unsigned, Value *> idAndMemref;
				2048	while (findSiblingNodeToFuse(dstNode, &visitedSibNodeIds, &idAndMemref)) {
				2049	unsigned sibId = idAndMemref.first;
				2050	Value *memref = idAndMemref.second;
				2051	// TODO(andydavis) Check that 'sibStoreOpInst' post-dominates all other
				2052	// stores to the same memref in 'sibNode' loop nest.
				2053	auto *sibNode = mdg->getNode(sibId);
				2054	// Compute an instruction list insertion point for the fused loop
				2055	// nest which preserves dependences.
				2056	assert(sibNode->inst->getBlock() == dstNode->inst->getBlock());
				2057	Instruction *insertPointInst =
				2058	sibNode->inst->isBeforeInBlock(dstNode->inst)
				2059	? mdg->getFusedLoopNestInsertionPoint(sibNode->id, dstNode->id)
				2060	: mdg->getFusedLoopNestInsertionPoint(dstNode->id, sibNode->id);
				2061	if (insertPointInst == nullptr)
				2062	continue;
				2063
				2064	// Check if fusion would be profitable and at what depth.
				2065
				2066	// Get unique 'sibNode' load op to 'memref'.
				2067	SmallVector<Instruction *, 2> sibLoadOpInsts;
				2068	sibNode->getLoadOpsForMemref(memref, &sibLoadOpInsts);
				2069	// Currently findSiblingNodeToFuse searches for siblings with one load.
				2070	assert(sibLoadOpInsts.size() == 1);
				2071	Instruction *sibLoadOpInst = sibLoadOpInsts[0];
				2072	assert(!sibNode->stores.empty());
				2073	// TODO(andydavis) Choose the store which postdominates all other stores.
				2074	auto *sibStoreOpInst = sibNode->stores.back();
				2075
				2076	// Gather 'dstNode' load ops to 'memref'.
				2077	SmallVector<Instruction *, 2> dstLoadOpInsts;
				2078	dstNode->getLoadOpsForMemref(memref, &dstLoadOpInsts);
				2079
				2080	// Gather 'dstNode' store ops to 'memref'.
				2081	SmallVector<Instruction *, 2> dstStoreOpInsts;
				2082	dstNode->getStoreOpsForMemref(memref, &dstStoreOpInsts);
				2083
				2084	unsigned bestDstLoopDepth;
				2085	mlir::ComputationSliceState sliceState;
				2086
				2087	// Check if fusion would be profitable.
				2088	if (!isFusionProfitable(sibLoadOpInst, sibStoreOpInst, dstLoadOpInsts,
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	2089	dstStoreOpInsts, &sliceState, &bestDstLoopDepth,
				2090	maximalFusion))
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	2091	continue;
				2092
				2093	// Fuse computation slice of 'sibLoopNest' into 'dstLoopNest'.
				2094	auto sliceLoopNest = mlir::insertBackwardComputationSlice(
				2095	sibLoadOpInst, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState);
				2096	if (sliceLoopNest != nullptr) {
				2097	auto dstForInst = dstNode->inst->cast<AffineForOp>();
				2098	// Update instruction position of fused loop nest (if needed).
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	2099	if (insertPointInst != dstForInst.getInstruction()) {
				2100	dstForInst.getInstruction()->moveBefore(insertPointInst);
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	2101	}
				2102	// Update data dependence graph state post fusion.
				2103	updateStateAfterSiblingFusion(sliceLoopNest, sibNode, dstNode);
				2104	}
				2105	}
				2106	}
				2107
				2108	// Searches the graph from 'dstNode' looking for a fusion candidate sibling
				2109	// node which shares no dependences with 'dstNode' but which loads from the
				2110	// same memref. Returns true and sets 'idAndMemrefToFuse' on success. Returns
				2111	// false otherwise.
				2112	bool findSiblingNodeToFuse(Node *dstNode,
				2113	DenseSet<unsigned> *visitedSibNodeIds,
				2114	std::pair<unsigned, Value > idAndMemrefToFuse) {
				2115	// TODO(andydavis) Currently we discover siblings by following edges
				2116	// through an intermediate src node. We should also consider siblings
				2117	// which load from the same memref, but which do not necessarily share
				2118	// a src node parent (e.g. loading from a memref which is a function arg).
				2119	// Collect candidate 'dstNode' input edges in 'inEdges'.
				2120	SmallVector<MemRefDependenceGraph::Edge, 2> inEdges;
				2121	mdg->forEachMemRefInputEdge(
				2122	dstNode->id, [&](MemRefDependenceGraph::Edge inEdge) {
				2123	// Add 'inEdge' if it is a read-after-write dependence.
				2124	if (dstNode->getLoadOpCount(inEdge.value) > 0 &&
				2125	mdg->getNode(inEdge.id)->getStoreOpCount(inEdge.value) > 0)
				2126	inEdges.push_back(inEdge);
				2127	});
				2128
				2129	// Search for sibling nodes to fuse by visiting output edges from each input
				2130	// edge in 'inEdges'.
				2131	for (auto &inEdge : inEdges) {
				2132	// Collect candidate output edges from each node 'inEdge.id' in 'inEdges'.
				2133	SmallVector<MemRefDependenceGraph::Edge, 2> outEdges;
				2134	mdg->forEachMemRefOutputEdge(
				2135	inEdge.id, [&](MemRefDependenceGraph::Edge outEdge) {
				2136	unsigned sibNodeId = outEdge.id;
				2137	if (visitedSibNodeIds->count(sibNodeId) > 0)
				2138	return;
				2139	// Skip output edge if not a sibling using the same memref.
				2140	if (outEdge.id == dstNode->id \|\| outEdge.value != inEdge.value)
				2141	return;
				2142	auto *sibNode = mdg->getNode(sibNodeId);
				2143	if (!sibNode->inst->isa<AffineForOp>())
				2144	return;
				2145	// Skip if 'outEdge' is not a read-after-write dependence.
				2146	// TODO(andydavis) Remove restrict to single load op restriction.
				2147	if (sibNode->getLoadOpCount(inEdge.value) != 1)
				2148	return;
				2149	// Skip if there exists a path of dependent edges between
				2150	// 'sibNode' and 'dstNode'.
				2151	if (mdg->hasDependencePath(sibNodeId, dstNode->id) \|\|
				2152	mdg->hasDependencePath(dstNode->id, sibNodeId))
				2153	return;
				2154	// Skip sib node if it loads to (and stores from) the same memref on
				2155	// which it also has an input dependence edge.
				2156	DenseSet<Value *> loadAndStoreMemrefSet;
				2157	sibNode->getLoadAndStoreMemrefSet(&loadAndStoreMemrefSet);
				2158	if (llvm::any_of(loadAndStoreMemrefSet, [=](Value *memref) {
				2159	return mdg->getIncomingMemRefAccesses(sibNode->id, memref) >
				2160	0;
				2161	}))
				2162	return;
				2163	// Check that all stores are to the same memref.
				2164	DenseSet<Value *> storeMemrefs;
				2165	for (auto *storeOpInst : sibNode->stores) {
				2166	storeMemrefs.insert(storeOpInst->cast<StoreOp>()->getMemRef());
				2167	}
				2168	if (storeMemrefs.size() != 1)
				2169	return;
				2170	// Add candidate 'outEdge' to sibling node.
				2171	outEdges.push_back(outEdge);
				2172	});
				2173
				2174	// Add first candidate if any were returned.
				2175	if (!outEdges.empty()) {
				2176	visitedSibNodeIds->insert(outEdges[0].id);
				2177	idAndMemrefToFuse->first = outEdges[0].id;
				2178	idAndMemrefToFuse->second = outEdges[0].value;
				2179	return true;
				2180	}
				2181	}
				2182	return false;
				2183	}
				2184
Chris Lattner	d9b5bc8	2019-03-25 02:53:05	[diff] [blame]	2185	void updateStateAfterSiblingFusion(AffineForOp sliceLoopNest, Node *sibNode,
				2186	Node *dstNode) {
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	2187	// Update 'sibNode' and 'dstNode' input/output edges to reflect fusion.
				2188	mdg->updateEdges(sibNode->id, dstNode->id);
				2189
				2190	// Collect slice loop stats.
				2191	LoopNestStateCollector sliceCollector;
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	2192	sliceCollector.collect(sliceLoopNest.getInstruction());
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	2193	// Promote single iteration slice loops to single IV value.
				2194	for (auto forOp : sliceCollector.forOps) {
				2195	promoteIfSingleIteration(forOp);
				2196	}
				2197
				2198	// Collect dst loop stats after memref privatizaton transformation.
				2199	auto dstForInst = dstNode->inst->cast<AffineForOp>();
				2200	LoopNestStateCollector dstLoopCollector;
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	2201	dstLoopCollector.collect(dstForInst.getInstruction());
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	2202	// Clear and add back loads and stores
				2203	mdg->clearNodeLoadAndStores(dstNode->id);
				2204	mdg->addToNode(dstNode->id, dstLoopCollector.loadOpInsts,
				2205	dstLoopCollector.storeOpInsts);
				2206	// Remove old sibling loop nest if it no longer has outgoing dependence
				2207	// edges, and it does not write to a memref which escapes the
				2208	// function.
				2209	if (mdg->getOutEdgeCount(sibNode->id) == 0) {
				2210	mdg->removeNode(sibNode->id);
River Riddle	af1abcc	2019-03-25 18:13:31	[diff] [blame^]	2211	sibNode->inst->cast<AffineForOp>().erase();
MLIR Team	d038e34	2019-03-01 19:50:25	[diff] [blame]	2212	}
				2213	}
				2214
				2215	// Clean up any allocs with no users.
				2216	void eraseUnusedMemRefAllocations() {
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	2217	for (auto &pair : mdg->memrefEdgeCount) {
				2218	if (pair.second > 0)
				2219	continue;
				2220	auto *memref = pair.first;
MLIR Team	71495d5	2019-01-22 21:23:37	[diff] [blame]	2221	// Skip if there exist other uses (return instruction or function calls).
				2222	if (!memref->use_empty())
				2223	continue;
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	2224	// Use list expected to match the dep graph info.
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	2225	auto *inst = memref->getDefiningInst();
River Riddle	b499277	2019-02-04 18:38:47	[diff] [blame]	2226	if (inst && inst->isa<AllocOp>())
				2227	inst->erase();
MLIR Team	c4237ae	2019-01-18 16:56:27	[diff] [blame]	2228	}
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	2229	}
MLIR Team	3b69230	2018-12-17 17:57:14	[diff] [blame]	2230	};
				2231
				2232	} // end anonymous namespace
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	2233
River Riddle	ed5fe20	2019-02-28 22:50:42	[diff] [blame]	2234	void LoopFusion::runOnFunction() {
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	2235	// Override if a command line argument was provided.
Uday Bondhugula	8be2627	2019-02-02 01:06:22	[diff] [blame]	2236	if (clFusionFastMemorySpace.getNumOccurrences() > 0) {
				2237	fastMemorySpace = clFusionFastMemorySpace.getValue();
				2238	}
				2239
Uday Bondhugula	d4b3ff1	2019-02-27 00:10:19	[diff] [blame]	2240	// Override if a command line argument was provided.
				2241	if (clFusionLocalBufThreshold.getNumOccurrences() > 0) {
				2242	localBufSizeThreshold = clFusionLocalBufThreshold * 1024;
				2243	}
				2244
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	2245	if (clMaximalLoopFusion.getNumOccurrences() > 0)
				2246	maximalFusion = clMaximalLoopFusion;
				2247
MLIR Team	6892ffb	2018-12-20 04:42:55	[diff] [blame]	2248	MemRefDependenceGraph g;
Uday Bondhugula	02af8c2	2019-03-05 23:05:34	[diff] [blame]	2249	if (g.init(getFunction()))
Uday Bondhugula	ce7e5953	2019-03-08 17:21:52	[diff] [blame]	2250	GreedyFusion(&g, localBufSizeThreshold, fastMemorySpace, maximalFusion)
				2251	.run();
MLIR Team	f28e4df	2018-11-01 14:26:00	[diff] [blame]	2252	}
Jacques Pienaar	6f0fb22	2018-11-07 02:34:18	[diff] [blame]	2253
				2254	static PassRegistration<LoopFusion> pass("loop-fusion", "Fuse loop nests");