Domain, Halo and Padding regions

In this tutorial we will learn about data regions and how these impact the Operator construction. We will use a simple time marching example.

from devito import Eq, Grid, TimeFunction, Operator

grid = Grid(shape=(3, 3))
u = TimeFunction(name='u', grid=grid)
u.data[:] = 1

At this point, we have a time-varying 3x3 grid filled with 1’s. Below, we can see the domain data values:

print(u.data)

[[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]]

We now create a time-marching Operator that, at each timestep, increments by 2 all points in the computational domain.

from devito import configuration
configuration['language'] = 'C'

eq = Eq(u.forward, u+2)
op = Operator(eq, opt='noop')

We can print op to get the generated code.

print(op)

/* Devito generated code for Operator `Kernel` */

#define _POSIX_C_SOURCE 200809L
#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;

#include "stdlib.h"
#include "math.h"
#include "sys/time.h"

struct dataobj
{
  void *restrict data;
  int * size;
  unsigned long nbytes;
  unsigned long * npsize;
  unsigned long * dsize;
  int * hsize;
  int * hofs;
  int * oofs;
  void * dmap;
} ;

struct profiler
{
  double section0;
} ;


int Kernel(struct dataobj *restrict u_vec, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, struct profiler * timers)
{
  float (*restrict u)[u_vec->size[1]][u_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]]) u_vec->data;

  for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))
  {
    START(section0)
    for (int x = x_m; x <= x_M; x += 1)
    {
      for (int y = y_m; y <= y_M; y += 1)
      {
        u[t1][x + 1][y + 1] = u[t0][x + 1][y + 1] + 2;
      }
    }
    STOP(section0,timers)
  }

  return 0;
}

When we take a look at the constructed expression, u[t1][x + 1][y + 1] = u[t0][x + 1][y + 1] + 2, we see several +1 were added to the u’s spatial indices.

This is because the domain region is actually surrounded by ‘ghost’ points, which can be accessed via a stencil when iterating in proximity of the domain boundary. The ghost points define the halo region. The halo region can be accessed through the data_with_halo data accessor. As we see below, the halo points correspond to the zeros surrounding the domain region.

print(u.data_with_halo)

[[[0. 0. 0. 0. 0.]
  [0. 1. 1. 1. 0.]
  [0. 1. 1. 1. 0.]
  [0. 1. 1. 1. 0.]
  [0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0.]
  [0. 1. 1. 1. 0.]
  [0. 1. 1. 1. 0.]
  [0. 1. 1. 1. 0.]
  [0. 0. 0. 0. 0.]]]

By adding the +1 offsets, the Devito compiler ensures the array accesses are logically aligned to the equation’s physical domain. For instance, the TimeFunction u(t, x, y) used in the example above has one point on each side of the x and y halo regions; if the user writes an expression including u(t, x, y) and u(t, x + 2, y + 2), the compiler will ultimately generate u[t, x + 1, y + 1] and u[t, x + 3, y + 3]. When x = y = 0, therefore, the values u[t, 1, 1] and u[t, 3, 3] are fetched, representing the first and third points in the physical domain.

By default, the halo region has space_order points on each side of the space dimensions. Sometimes, these points may be unnecessary, or, depending on the partial differential equation being approximated, extra points may be needed.

u0 = TimeFunction(name='u0', grid=grid, space_order=0)
u0.data[:] = 1
print(u0.data_with_halo)

[[[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]
  [1. 1. 1.]]]

u2 = TimeFunction(name='u2', grid=grid, space_order=2)
u2.data[:] = 1
print(u2.data_with_halo)

[[[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 1. 1. 1. 0. 0.]
  [0. 0. 1. 1. 1. 0. 0.]
  [0. 0. 1. 1. 1. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 1. 1. 1. 0. 0.]
  [0. 0. 1. 1. 1. 0. 0.]
  [0. 0. 1. 1. 1. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]]

One can also pass a 3-tuple (o, lp, rp) instead of a single integer representing the discretization order. Here, o is the discretization order, while lp and rp indicate how many points are expected on left and right sides of a point of interest, respectivelly.

u_new = TimeFunction(name='u_new', grid=grid, space_order=(4, 3, 1))

u_new.data[:] = 1
print(u_new.data_with_halo)

[[[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 1. 1. 0.]
  [0. 0. 0. 1. 1. 1. 0.]
  [0. 0. 0. 1. 1. 1. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 1. 1. 0.]
  [0. 0. 0. 1. 1. 1. 0.]
  [0. 0. 0. 1. 1. 1. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]]

Let’s have a look at the generated code when using u_new.

equation = Eq(u_new.forward, u_new + 2)
op = Operator(equation, opt='noop')
print(op)

/* Devito generated code for Operator `Kernel` */

#define _POSIX_C_SOURCE 200809L
#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;

#include "stdlib.h"
#include "math.h"
#include "sys/time.h"

struct dataobj
{
  void *restrict data;
  int * size;
  unsigned long nbytes;
  unsigned long * npsize;
  unsigned long * dsize;
  int * hsize;
  int * hofs;
  int * oofs;
  void * dmap;
} ;

struct profiler
{
  double section0;
} ;


int Kernel(struct dataobj *restrict u_new_vec, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, struct profiler * timers)
{
  float (*restrict u_new)[u_new_vec->size[1]][u_new_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_new_vec->size[1]][u_new_vec->size[2]]) u_new_vec->data;

  for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))
  {
    START(section0)
    for (int x = x_m; x <= x_M; x += 1)
    {
      for (int y = y_m; y <= y_M; y += 1)
      {
        u_new[t1][x + 3][y + 3] = u_new[t0][x + 3][y + 3] + 2;
      }
    }
    STOP(section0,timers)
  }

  return 0;
}

And finally, let’s run it, to convince ourselves that only the domain region values will be incremented at each timestep.

#NBVAL_IGNORE_OUTPUT
op.apply(time_M=2)
print(u_new.data_with_halo)

Operator `Kernel` ran in 0.01 s

[[[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 5. 5. 5. 0.]
  [0. 0. 0. 5. 5. 5. 0.]
  [0. 0. 0. 5. 5. 5. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 7. 7. 7. 0.]
  [0. 0. 0. 7. 7. 7. 0.]
  [0. 0. 0. 7. 7. 7. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]]

The halo region, in turn, is surrounded by the padding region, which can be used for data alignment. By default, there is no padding. This can be changed by passing a suitable value to padding, as shown below:

u_pad = TimeFunction(name='u_pad', grid=grid, space_order=2, padding=(0,2,2))
u_pad._data_allocated[:] = 0
u_pad.data_with_halo[:] = 1
u_pad.data[:] = 2
equation = Eq(u_pad.forward, u_pad + 2)
op = Operator(equation, opt='noop')
print(op)

/* Devito generated code for Operator `Kernel` */

#define _POSIX_C_SOURCE 200809L
#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;

#include "stdlib.h"
#include "math.h"
#include "sys/time.h"

struct dataobj
{
  void *restrict data;
  int * size;
  unsigned long nbytes;
  unsigned long * npsize;
  unsigned long * dsize;
  int * hsize;
  int * hofs;
  int * oofs;
  void * dmap;
} ;

struct profiler
{
  double section0;
} ;


int Kernel(struct dataobj *restrict u_pad_vec, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, struct profiler * timers)
{
  float (*restrict u_pad)[u_pad_vec->size[1]][u_pad_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[u_pad_vec->size[1]][u_pad_vec->size[2]]) u_pad_vec->data;

  for (int time = time_m, t0 = (time)%(2), t1 = (time + 1)%(2); time <= time_M; time += 1, t0 = (time)%(2), t1 = (time + 1)%(2))
  {
    START(section0)
    for (int x = x_m; x <= x_M; x += 1)
    {
      for (int y = y_m; y <= y_M; y += 1)
      {
        u_pad[t1][x + 2][y + 2] = u_pad[t0][x + 2][y + 2] + 2;
      }
    }
    STOP(section0,timers)
  }

  return 0;
}

Although in practice not very useful, with the (private) _data_allocated accessor one can see the entire domain + halo + padding region.

print(u_pad._data_allocated)

[[[1. 1. 1. 1. 1. 1. 1. 0. 0.]
  [1. 1. 1. 1. 1. 1. 1. 0. 0.]
  [1. 1. 2. 2. 2. 1. 1. 0. 0.]
  [1. 1. 2. 2. 2. 1. 1. 0. 0.]
  [1. 1. 2. 2. 2. 1. 1. 0. 0.]
  [1. 1. 1. 1. 1. 1. 1. 0. 0.]
  [1. 1. 1. 1. 1. 1. 1. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]]

 [[1. 1. 1. 1. 1. 1. 1. 0. 0.]
  [1. 1. 1. 1. 1. 1. 1. 0. 0.]
  [1. 1. 2. 2. 2. 1. 1. 0. 0.]
  [1. 1. 2. 2. 2. 1. 1. 0. 0.]
  [1. 1. 2. 2. 2. 1. 1. 0. 0.]
  [1. 1. 1. 1. 1. 1. 1. 0. 0.]
  [1. 1. 1. 1. 1. 1. 1. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0.]]]

Above, the domain is filled with 2, the halo is filled with 1, and the padding is filled with 0.

Other Formats