Skip to content

Commit f7c12f7

Browse files
authored
[cuebot] Introduce depend.satisfy_only_on_frame_success setting. (#1082)
1 parent 310af8a commit f7c12f7

File tree

5 files changed

+173
-12
lines changed

5 files changed

+173
-12
lines changed

cuebot/src/main/java/com/imageworks/spcue/dispatcher/FrameCompleteHandler.java

+38-10
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import java.util.concurrent.atomic.AtomicLong;
2626

2727
import org.apache.log4j.Logger;
28+
import org.springframework.beans.factory.annotation.Autowired;
29+
import org.springframework.core.env.Environment;
2830
import org.springframework.dao.EmptyResultDataAccessException;
2931

3032
import com.imageworks.spcue.DispatchFrame;
@@ -92,6 +94,25 @@ public class FrameCompleteHandler {
9294
*/
9395
private boolean shutdown = false;
9496

97+
/**
98+
* Whether or not to satisfy dependents (*_ON_FRAME and *_ON_LAYER) only on Frame success
99+
*/
100+
private boolean satisfyDependOnlyOnFrameSuccess;
101+
102+
public boolean getSatisfyDependOnlyOnFrameSuccess() {
103+
return satisfyDependOnlyOnFrameSuccess;
104+
}
105+
106+
public void setSatisfyDependOnlyOnFrameSuccess(boolean satisfyDependOnlyOnFrameSuccess) {
107+
this.satisfyDependOnlyOnFrameSuccess = satisfyDependOnlyOnFrameSuccess;
108+
}
109+
110+
@Autowired
111+
public FrameCompleteHandler(Environment env) {
112+
satisfyDependOnlyOnFrameSuccess = env.getProperty(
113+
"depend.satisfy_only_on_frame_success", Boolean.class, true);
114+
}
115+
95116
/**
96117
* Handle the given FrameCompleteReport from RQD.
97118
*
@@ -235,21 +256,28 @@ public void handlePostFrameCompleteOperations(VirtualProc proc,
235256

236257
dispatchSupport.updateUsageCounters(frame, report.getExitStatus());
237258

238-
if (newFrameState.equals(FrameState.SUCCEEDED)) {
259+
boolean isLayerComplete = false;
260+
261+
if (newFrameState.equals(FrameState.SUCCEEDED)
262+
|| (!satisfyDependOnlyOnFrameSuccess
263+
&& newFrameState.equals(FrameState.EATEN))) {
239264
jobManagerSupport.satisfyWhatDependsOn(frame);
240-
if (jobManager.isLayerComplete(frame)) {
265+
isLayerComplete = jobManager.isLayerComplete(frame);
266+
if (isLayerComplete) {
241267
jobManagerSupport.satisfyWhatDependsOn((LayerInterface) frame);
242-
} else {
243-
/*
244-
* If the layer meets some specific criteria then try to
245-
* update the minimum memory and tags so it can run on a
246-
* wider variety of cores, namely older hardware.
247-
*/
248-
jobManager.optimizeLayer(frame, report.getFrame().getNumCores(),
249-
report.getFrame().getMaxRss(), report.getRunTime());
250268
}
251269
}
252270

271+
if (newFrameState.equals(FrameState.SUCCEEDED) && !isLayerComplete) {
272+
/*
273+
* If the layer meets some specific criteria then try to
274+
* update the minimum memory and tags so it can run on a
275+
* wider variety of cores, namely older hardware.
276+
*/
277+
jobManager.optimizeLayer(frame, report.getFrame().getNumCores(),
278+
report.getFrame().getMaxRss(), report.getRunTime());
279+
}
280+
253281
/*
254282
* The final frame can either be Succeeded or Eaten. If you only
255283
* check if the frame is Succeeded before doing an isJobComplete

cuebot/src/main/resources/opencue.properties

+3
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ dispatcher.booking_queue.max_pool_size=6
8888
# Queue capacity for booking.
8989
dispatcher.booking_queue.queue_capacity=1000
9090

91+
# Whether or not to satisfy dependents (*_ON_FRAME and *_ON_LAYER) only on Frame success
92+
depend.satisfy_only_on_frame_success=true
93+
9194
# Jobs will be archived to the history tables after being completed for this long.
9295
history.archive_jobs_cutoff_hours=72
9396

cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/FrameCompleteHandlerTests.java

+96-1
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,23 @@
2727
import org.junit.Test;
2828
import org.springframework.test.annotation.Rollback;
2929
import org.springframework.test.context.ContextConfiguration;
30+
import org.springframework.transaction.annotation.Propagation;
3031
import org.springframework.transaction.annotation.Transactional;
3132

33+
import com.imageworks.spcue.DispatchFrame;
3234
import com.imageworks.spcue.DispatchHost;
33-
import com.imageworks.spcue.FrameInterface;
35+
import com.imageworks.spcue.DispatchJob;
36+
import com.imageworks.spcue.FrameDetail;
3437
import com.imageworks.spcue.JobDetail;
3538
import com.imageworks.spcue.LayerDetail;
3639
import com.imageworks.spcue.VirtualProc;
40+
import com.imageworks.spcue.dao.FrameDao;
3741
import com.imageworks.spcue.dao.LayerDao;
3842
import com.imageworks.spcue.dispatcher.Dispatcher;
43+
import com.imageworks.spcue.dispatcher.DispatchSupport;
3944
import com.imageworks.spcue.dispatcher.FrameCompleteHandler;
4045
import com.imageworks.spcue.grpc.host.HardwareState;
46+
import com.imageworks.spcue.grpc.job.FrameState;
4147
import com.imageworks.spcue.grpc.report.FrameCompleteReport;
4248
import com.imageworks.spcue.grpc.report.RenderHost;
4349
import com.imageworks.spcue.grpc.report.RunningFrameInfo;
@@ -70,12 +76,18 @@ public class FrameCompleteHandlerTests extends TransactionalTest {
7076
@Resource
7177
JobManager jobManager;
7278

79+
@Resource
80+
FrameDao frameDao;
81+
7382
@Resource
7483
LayerDao layerDao;
7584

7685
@Resource
7786
Dispatcher dispatcher;
7887

88+
@Resource
89+
DispatchSupport dispatchSupport;
90+
7991
private static final String HOSTNAME = "beta";
8092

8193
@Before
@@ -232,5 +244,88 @@ public void testGpuReportOver() {
232244
(jobManager.isJobComplete(job1) ? 1 : 0) +
233245
(jobManager.isJobComplete(job2) ? 1 : 0));
234246
}
247+
248+
private void executeDepend(
249+
FrameState frameState, int exitStatus, int dependCount, FrameState dependState) {
250+
JobDetail job = jobManager.findJobDetail("pipe-default-testuser_test_depend");
251+
LayerDetail layerFirst = layerDao.findLayerDetail(job, "layer_first");
252+
LayerDetail layerSecond = layerDao.findLayerDetail(job, "layer_second");
253+
FrameDetail frameFirst = frameDao.findFrameDetail(job, "0000-layer_first");
254+
FrameDetail frameSecond = frameDao.findFrameDetail(job, "0000-layer_second");
255+
256+
assertEquals(1, frameSecond.dependCount);
257+
assertEquals(FrameState.DEPEND, frameSecond.state);
258+
259+
jobManager.setJobPaused(job, false);
260+
261+
DispatchHost host = getHost();
262+
List<VirtualProc> procs = dispatcher.dispatchHost(host);
263+
assertEquals(1, procs.size());
264+
VirtualProc proc = procs.get(0);
265+
assertEquals(job.getId(), proc.getJobId());
266+
assertEquals(layerFirst.getId(), proc.getLayerId());
267+
assertEquals(frameFirst.getId(), proc.getFrameId());
268+
269+
RunningFrameInfo info = RunningFrameInfo.newBuilder()
270+
.setJobId(proc.getJobId())
271+
.setLayerId(proc.getLayerId())
272+
.setFrameId(proc.getFrameId())
273+
.setResourceId(proc.getProcId())
274+
.build();
275+
FrameCompleteReport report = FrameCompleteReport.newBuilder()
276+
.setFrame(info)
277+
.setExitStatus(exitStatus)
278+
.build();
279+
280+
DispatchJob dispatchJob = jobManager.getDispatchJob(proc.getJobId());
281+
DispatchFrame dispatchFrame = jobManager.getDispatchFrame(report.getFrame().getFrameId());
282+
dispatchSupport.stopFrame(dispatchFrame, frameState, report.getExitStatus(),
283+
report.getFrame().getMaxRss());
284+
frameCompleteHandler.handlePostFrameCompleteOperations(proc,
285+
report, dispatchJob, dispatchFrame, frameState);
286+
287+
assertTrue(jobManager.isLayerComplete(layerFirst));
288+
assertFalse(jobManager.isLayerComplete(layerSecond));
289+
290+
frameSecond = frameDao.findFrameDetail(job, "0000-layer_second");
291+
assertEquals(dependCount, frameSecond.dependCount);
292+
assertEquals(dependState, frameSecond.state);
293+
}
294+
295+
@Test
296+
@Transactional
297+
@Rollback(true)
298+
public void testDependOnSuccess() {
299+
assertTrue(frameCompleteHandler.getSatisfyDependOnlyOnFrameSuccess());
300+
executeDepend(FrameState.SUCCEEDED, 0, 0, FrameState.WAITING);
301+
}
302+
303+
@Test
304+
@Transactional
305+
@Rollback(true)
306+
public void testDependOnFailure() {
307+
assertTrue(frameCompleteHandler.getSatisfyDependOnlyOnFrameSuccess());
308+
executeDepend(FrameState.EATEN, -1, 1, FrameState.DEPEND);
309+
}
310+
311+
@Test
312+
@Transactional
313+
@Rollback(true)
314+
public void testDependOnSuccessSatifyOnAny() {
315+
frameCompleteHandler.setSatisfyDependOnlyOnFrameSuccess(false);
316+
assertFalse(frameCompleteHandler.getSatisfyDependOnlyOnFrameSuccess());
317+
executeDepend(FrameState.SUCCEEDED, 0, 0, FrameState.WAITING);
318+
frameCompleteHandler.setSatisfyDependOnlyOnFrameSuccess(true);
319+
}
320+
321+
@Test
322+
@Transactional
323+
@Rollback(true)
324+
public void testDependOnFailureSatisfyOnAny() {
325+
frameCompleteHandler.setSatisfyDependOnlyOnFrameSuccess(false);
326+
assertFalse(frameCompleteHandler.getSatisfyDependOnlyOnFrameSuccess());
327+
executeDepend(FrameState.EATEN, -1, 0, FrameState.WAITING);
328+
frameCompleteHandler.setSatisfyDependOnlyOnFrameSuccess(true);
329+
}
235330
}
236331

cuebot/src/test/java/com/imageworks/spcue/test/dispatcher/HistoryControlTests.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ public void testEnabled() {
169169

170170
launchAndDeleteJob();
171171

172-
assertEquals(Integer.valueOf(3), jdbcTemplate.queryForObject(
172+
assertEquals(Integer.valueOf(4), jdbcTemplate.queryForObject(
173173
"SELECT COUNT(*) FROM job_history", Integer.class));
174174
assertEquals(Integer.valueOf(1), jdbcTemplate.queryForObject(
175175
"SELECT COUNT(*) FROM frame_history", Integer.class));

cuebot/src/test/resources/conf/jobspec/jobspec_gpus_test.xml

+35
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,39 @@
7373
</layer>
7474
</layers>
7575
</job>
76+
77+
<job name="test_depend">
78+
<paused>True</paused>
79+
<layers>
80+
<layer name="layer_first" type="Render">
81+
<cmd>true</cmd>
82+
<range>0</range>
83+
<chunk>1</chunk>
84+
<gpus>1</gpus>
85+
<gpu_memory>1</gpu_memory>
86+
<services>
87+
<service>shell</service>
88+
</services>
89+
</layer>
90+
<layer name="layer_second" type="Render">
91+
<cmd>true</cmd>
92+
<range>0</range>
93+
<chunk>1</chunk>
94+
<gpus>1</gpus>
95+
<gpu_memory>1</gpu_memory>
96+
<services>
97+
<service>shell</service>
98+
</services>
99+
</layer>
100+
</layers>
101+
</job>
102+
103+
<depends>
104+
<depend type="LAYER_ON_LAYER" anyframe="False">
105+
<depjob>test_depend</depjob>
106+
<deplayer>layer_second</deplayer>
107+
<onjob>test_depend</onjob>
108+
<onlayer>layer_first</onlayer>
109+
</depend>
110+
</depends>
76111
</spec>

0 commit comments

Comments
 (0)