ArrayFire Graphics
A tutorial
by Chris McClanahan, GPU Engineer
Overview
Introduction to ArrayFire
Introduction to ArrayFire Graphics
Optical Flow
Optical Flow GFX Example / Demo
ArrayFire Introduction
Matrix Types
f64
f32 array b8
c32
c64
real double precision
real single precision
boolean byte
container type
complex single precision
complex double precision
Matrix Types: ND Support
f64
real double precision
vectors
f32
matrices
b8
boolean byte
real single precision
volumes
c32
complex single precision
c64
complex double precision
ND
Matrix Types: Easy Manipulation
f64
real double precision
ArrayFire Keywords: end, span
A(1,1)
A(1,span)
A(span,span,2)
b8
f32
boolean byte
real single precision
A(end,1)
A(end,span)
c32
complex single precision
c64
complex double precision
Easy GPU Acceleration in C++
#include <stdio.h>
#include <arrayfire.h>
using namespace af;
int main() {
// 20 million random samples
int n = 20e6;
array x = randu(n,1), y = randu(n,1);
// how many fell inside unit circle?
float pi = 4 * sum<float>(sqrt(mul(x,x)+mul(y,y))<1) / n;
printf("pi = %g\n", pi);
return 0;
}
Easy GPU Acceleration in C++
#include <stdio.h>
#include <arrayfire.h>
using namespace af;
int main() {
// 20 million random samples
int n = 20e6;
array x = randu(n,1), y = randu(n,1);
// how many fell inside unit circle?
float pi = 4 * sum<float>(sqrt(mul(x,x)+mul(y,y))<1) / n;
printf("pi = %g\n", pi);
return 0;
}
Easy GPU Acceleration in C++
#include <stdio.h>
#include <arrayfire.h>
using namespace af;
int main() {
// 20 million random samples
int n = 20e6;
array x = randu(n,1), y = randu(n,1);
// how many fell inside unit circle?
float pi = 4 * sum<float>(sqrt(mul(x,x)+mul(y,y))<1) / n;
printf("pi = %g\n", pi);
return 0;
On GPU
}
Easy GPU Acceleration in C++
#include <stdio.h>
#include <arrayfire.h>
using namespace af;
int main() {
// 20 million random samples
int n = 20e6;
array x = randu(n,1), y = randu(n,1);
// how many fell inside unit circle?
float pi = 4 * sum<float>(sqrt(mul(x,x)+mul(y,y))<1) / n;
printf("pi = %g\n", pi);
return 0;
On GPU
}
ArrayFire Graphics Introduction
Coupled Compute and GFX
Graphics were designed to be as unobtrusive
as possible on GPU compute
Graphics rendering completed in separate worker
thread
Most graphics commands from compute thread
non-blocking and lazy
Graphics commands designed to be as simplistic
as possible
ArrayFire Graphics Example
#include <stdio.h>
#include <arrayfire.h>
using namespace af;
int main() {
// Infinite number of random 3d surfaces
const int n = 256;
while (1) {
array x = randu(n,n);
// 3d surface plot
plot3d(x);
}
return 0;
}
DEMO
ArrayFire Graphics Example
#include <stdio.h>
#include <arrayfire.h>
using namespace af;
int main() {
// Infinite number of random 3d surfaces
const int n = 256;
GPU generates random
while (1) {
array x = randu(n,n);
numbers in compute
// 3d surface plot
thread
plot3d(x);
}
return 0;
}
ArrayFire Graphics Example
#include <stdio.h>
#include <arrayfire.h>
using namespace af;
int main() {
// Infinite number of random 3d surfaces
const int n = 256;
while (1) {
Data from x is
array x = randu(n,n);
transferred to OpenGL
// 3d surface plot
plot3d(x);
and drawing is queued
}
in newly created render
return 0;
thread
}
ArrayFire Graphics Example
#include <stdio.h>
#include <arrayfire.h>
using namespace af;
int main() {
// Infinite number of random 3d surfaces
const int n = 256;
while (1) {
array x = randu(n,n);
Drawing done in render
// 3d surface plot
plot3d(x);
thread at 35Hz; some
}
data dropped
return 0;
}
ArrayFire Graphics Implementation
ArrayFire
Instantiation
GFX (C++)
Lua
Tranfer of CUDA data
to OpenGL
Data Draw Requests
Wake and
Consumption
Of Draw Requests
At 35Hz
Compute Thread
Render Thread
ArrayFire Graphics Implementation
Render thread hybrid C++/Lua
C++
Wake / Sleep Loop
OpenGL Memory Management
GFX (C++)
Lua
Lua
Port of OpenGL API to Lua
(independent AccelerEyes effort)
All graphics primitives / drawing logic
Wake and
Consumption
Of Draw Requests
At 35Hz
Render Thread
ArrayFire Graphics Implementation
Lua interface to be opened up
at a later date to end users
Custom Graphics Primitives
Modification of Existing GFX
system
Easily couple visualization with
compute in a platform
independent environment
GFX (C++)
Lua
Wake and
Consumption
Of Draw Requests
At 35Hz
Render Thread
Graphics Commands
Available primitives (non-blocking)
plot3d: 3d surface plotting (2d data)
plot: 2d line plotting
imgplot: intensity image visualization
arrows: quiver plot for vector fields
points: scatter plot
volume: volume rendering for 3d data
rgbplot: color image visualization
DEMO
Graphics Commands
Utility Commands (blocking unless otherwise
stated)
keep_on / keep_off
subfigure
palette
clearfig
draw (blocking)
figure
title
close
DEMO
Optical Flow (Horn-Schunck)
Horn-Schunck Background
Compute apparent motion between two
images
Minimize the following functional,
By solving,
Horn-Schunck Background
Can be accomplished with ArrayFire
Relatively small amount of code
Iterative Implementation on GPU via gradient
descent
Easily couple compute with visualization via basic
graphics commands
ArrayFire Implementation
Main algorithm loop:
array u = zeros(I1.dims()), v = zeros(I1.dims());
while (true) {
iter++;
array u_ = convolve(u, avg_kernel, afConvSame);
array v_ = convolve(v, avg_kernel, afConvSame);
const float alphasq = 0.1f;
array num = mul(Ix, u_) + mul(Iy, v_) + It;
array den = alphasq + mul(Ix, Ix) + mul(Iy, Iy);
array rsc = 0.01f * num;
u = u_ - mul(Ix, rsc) / den;
v = v_ - mul(Iy, rsc) / den;
}
ArrayFire Implementation
Main algorithm loop:
Initial flow field we are
for (on GPU)
array u = zeros(I1.dims()), v = zeros(I1.dims());
solving
while (true) {
iter++;
array u_ = convolve(u, avg_kernel, afConvSame);
array v_ = convolve(v, avg_kernel, afConvSame);
const float alphasq = 0.1f;
array num = mul(Ix, u_) + mul(Iy, v_) + It;
array den = alphasq + mul(Ix, Ix) + mul(Iy, Iy);
array rsc = 0.01f * num;
u = u_ - mul(Ix, rsc) / den;
v = v_ - mul(Iy, rsc) / den;
}
ArrayFire Implementation
Main algorithm loop:
array u = zeros(I1.dims()), v = zeros(I1.dims());
while (true) {
iter++;
array u_ = convolve(u, avg_kernel, afConvSame);
array v_ = convolve(v, avg_kernel, afConvSame);
const float alphasq = 0.1f;
array num = mul(Ix, u_) + mul(Iy, v_) + It;
array den = alphasq + mul(Ix, Ix) + mul(Iy, Iy);
array rsc = 0.01f * num;
u = u_ - mul(Ix, rsc) / den;
v = v_ - mul(Iy, rsc) / den;
}
Blur current field (on
GPU)
ArrayFire Implementation
Main algorithm loop:
array u = zeros(I1.dims()), v = zeros(I1.dims());
while (true) {
iter++;
array u_ = convolve(u, avg_kernel, afConvSame);
array v_ = convolve(v, avg_kernel, afConvSame);
const float alphasq = 0.1f;
array num = mul(Ix, u_) + mul(Iy, v_) + It;
array den = alphasq + mul(Ix, Ix) + mul(Iy,
array rsc = 0.01f * num;
u = u_ - mul(Ix, rsc) / den;
v = v_ - mul(Iy, rsc) / den;
}
Direction for gradient
descent (on GPU). Ix,
Iy, It are image
Iy); derivatives computed
by diffs() routine.
(See code)
ArrayFire Implementation
Main algorithm loop:
array u = zeros(I1.dims()), v = zeros(I1.dims());
while (true) {
iter++;
array u_ = convolve(u, avg_kernel, afConvSame);
array v_ = convolve(v, avg_kernel, afConvSame);
const float alphasq = 0.1f;
array num = mul(Ix, u_) + mul(Iy, v_) + It;
array den = alphasq + mul(Ix, Ix) + mul(Iy, Iy);
array rsc = 0.01f * num;
u = u_ - mul(Ix, rsc) / den;
v = v_ - mul(Iy, rsc) / den;
}
Scale and apply
gradient to field
iteratively (on GPU)
Graphics Integration
array u = zeros(I1.dims()), v = zeros(I1.dims());
while (true) {
iter++;
array u_ = convolve(u, avg_kernel, afConvSame);
array v_ = convolve(v, avg_kernel, afConvSame);
const float alphasq = 0.1f;
array num = mul(Ix, u_) + mul(Iy, v_) + It;
array den = alphasq + mul(Ix, Ix) + mul(Iy, Iy);
array rsc = 0.01f * num;
u = u_ - mul(Ix, rsc) / den;
v = v_ - mul(Iy, rsc) / den;
subfigure(2,2,1);
subfigure(2,2,3);
subfigure(2,2,2);
subfigure(2,2,4);
}
imgplot(I1);
imgplot(I2);
imgplot(u);
imgplot(v);
DEMO
Create figures, and draw
ArrayFire + Other GPU Code
Thank You