StarPU Handbook
|
Data Structures | |
struct | starpu_data_filter |
Basic API | |
void | starpu_data_partition (starpu_data_handle_t initial_handle, struct starpu_data_filter *f) |
void | starpu_data_unpartition (starpu_data_handle_t root_data, unsigned gathering_node) |
int | starpu_data_get_nb_children (starpu_data_handle_t handle) |
starpu_data_handle_t | starpu_data_get_child (starpu_data_handle_t handle, unsigned i) |
starpu_data_handle_t | starpu_data_get_sub_data (starpu_data_handle_t root_data, unsigned depth,...) |
starpu_data_handle_t | starpu_data_vget_sub_data (starpu_data_handle_t root_data, unsigned depth, va_list pa) |
void | starpu_data_map_filters (starpu_data_handle_t root_data, unsigned nfilters,...) |
void | starpu_data_vmap_filters (starpu_data_handle_t root_data, unsigned nfilters, va_list pa) |
Asynchronous API | |
void | starpu_data_partition_plan (starpu_data_handle_t initial_handle, struct starpu_data_filter *f, starpu_data_handle_t *children) |
void | starpu_data_partition_submit (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) |
void | starpu_data_partition_readonly_submit (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) |
void | starpu_data_partition_readwrite_upgrade_submit (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children) |
void | starpu_data_unpartition_submit (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node) |
void | starpu_data_unpartition_readonly_submit (starpu_data_handle_t initial_handle, unsigned nparts, starpu_data_handle_t *children, int gathering_node) |
void | starpu_data_partition_clean (starpu_data_handle_t root_data, unsigned nparts, starpu_data_handle_t *children) |
Predefined Vector Filter Functions | |
This section gives a partial list of the predefined partitioning functions for vector data. Examples on how to use them are shown in Partitioning Data. The complete list can be found in the file | |
void | starpu_vector_filter_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_vector_filter_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_vector_filter_list_long (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_vector_filter_list (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_vector_filter_divide_in_2 (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
Predefined Matrix Filter Functions | |
This section gives a partial list of the predefined partitioning functions for matrix data. Examples on how to use them are shown in Partitioning Data. The complete list can be found in the file | |
void | starpu_matrix_filter_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_matrix_filter_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_matrix_filter_vertical_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_matrix_filter_vertical_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
Predefined Block Filter Functions | |
This section gives a partial list of the predefined partitioning functions for block data. Examples on how to use them are shown in Partitioning Data. The complete list can be found in the file | |
void | starpu_block_filter_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_block_filter_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_block_filter_vertical_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_block_filter_vertical_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_block_filter_depth_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_block_filter_depth_block_shadow (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
Predefined BCSR Filter Functions | |
This section gives a partial list of the predefined partitioning functions for BCSR data. Examples on how to use them are shown in Partitioning Data. The complete list can be found in the file | |
void | starpu_bcsr_filter_canonical_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
void | starpu_csr_filter_vertical_block (void *father_interface, void *child_interface, struct starpu_data_filter *f, unsigned id, unsigned nparts) |
struct starpu_data_filter |
The filter structure describes a data partitioning operation, to be given to the starpu_data_partition() function.
Data Fields | |
void(* | filter_func )(void *father_interface, void *child_interface, struct starpu_data_filter *, unsigned id, unsigned nparts) |
unsigned | nchildren |
unsigned(* | get_nchildren )(struct starpu_data_filter *, starpu_data_handle_t initial_handle) |
struct starpu_data_interface_ops *(* | get_child_ops )(struct starpu_data_filter *, unsigned id) |
unsigned | filter_arg |
void * | filter_arg_ptr |
void(* starpu_data_filter::filter_func)(void *father_interface, void *child_interface, struct starpu_data_filter *filter, unsigned i, unsigned nparts) |
Fill the child_interface
structure with interface information for the i
-th child of the parent father_interface
(among nparts
). The filter
structure is provided, allowing to inspect the starpu_data_filter::filter_arg and starpu_data_filter::filter_arg_ptr parameters.
The details of what needs to be filled in child_interface
vary according to the data interface, but generally speaking:
id
is usually just copied over from the father, when the sub data has the same structure as the father, e.g. a subvector is a vector, a submatrix is a matrix, etc. This is however not the case for instance when dividing a BCSR matrix into its dense blocks, which then are matrices. nx
, ny
and alike are usually divided by the number of subdata, depending how the subdivision is done (e.g. nx division vs ny division for vertical matrix division vs horizontal matrix division). ld
for matrix interfaces are usually just copied over: the leading dimension (ld) usually does not change. elemsize
is usually just copied over. ptr
, the pointer to the data, has to be computed according to i
and the father's ptr
, so as to point to the start of the sub data. This should however be done only if the father has ptr
different from NULL: in the OpenCL case notably, the dev_handle
and offset
fields are used instead. dev_handle
should be just copied over from the parent. offset
has to be computed according to i
and the father's offset
, so as to provide the offset of the start of the sub data. This is notably used for the OpenCL case. unsigned starpu_data_filter::nchildren |
This is the number of parts to partition the data into.
unsigned(* starpu_data_filter::get_nchildren)(struct starpu_data_filter *, starpu_data_handle_t initial_handle) |
This returns the number of children. This can be used instead of nchildren when the number of children depends on the actual data (e.g. the number of blocks in a sparse matrix).
struct starpu_data_interface_ops *(* starpu_data_filter::get_child_ops)(struct starpu_data_filter *, unsigned id) |
In case the resulting children use a different data interface, this function returns which interface is used by child number id.
unsigned starpu_data_filter::filter_arg |
Additional parameter for the filter function
void * starpu_data_filter::filter_arg_ptr |
Additional pointer parameter for the filter function, such as the sizes of the different parts.
void starpu_data_partition | ( | starpu_data_handle_t | initial_handle, |
struct starpu_data_filter * | f | ||
) |
This requests partitioning one StarPU data initial_handle into several subdata according to the filter f
.
Here an example of how to use the function.
void starpu_data_unpartition | ( | starpu_data_handle_t | root_data, |
unsigned | gathering_node | ||
) |
This unapplies one filter, thus unpartitioning the data. The pieces of data are collected back into one big piece in the gathering_node
(usually STARPU_MAIN_RAM). Tasks working on the partitioned data must be already finished when calling starpu_data_unpartition().
Here an example of how to use the function.
int starpu_data_get_nb_children | ( | starpu_data_handle_t | handle | ) |
This function returns the number of children.
starpu_data_handle_t starpu_data_get_child | ( | starpu_data_handle_t | handle, |
unsigned | i | ||
) |
Return the ith child of the given handle
, which must have been partitionned beforehand.
starpu_data_handle_t starpu_data_get_sub_data | ( | starpu_data_handle_t | root_data, |
unsigned | depth, | ||
... | |||
) |
After partitioning a StarPU data by applying a filter, starpu_data_get_sub_data() can be used to get handles for each of the data portions. root_data
is the parent data that was partitioned. depth
is the number of filters to traverse (in case several filters have been applied, to e.g. partition in row blocks, and then in column blocks), and the subsequent parameters are the indexes. The function returns a handle to the subdata.
Here an example of how to use the function.
starpu_data_handle_t starpu_data_vget_sub_data | ( | starpu_data_handle_t | root_data, |
unsigned | depth, | ||
va_list | pa | ||
) |
This function is similar to starpu_data_get_sub_data() but uses a va_list for the parameter list.
void starpu_data_map_filters | ( | starpu_data_handle_t | root_data, |
unsigned | nfilters, | ||
... | |||
) |
Applies nfilters
filters to the handle designated by root_handle
recursively. nfilters
pointers to variables of the type starpu_data_filter should be given.
void starpu_data_vmap_filters | ( | starpu_data_handle_t | root_data, |
unsigned | nfilters, | ||
va_list | pa | ||
) |
Applies nfilters
filters to the handle designated by root_handle
recursively. It uses a va_list of pointers to variables of the type starpu_data_filter.
void starpu_data_partition_plan | ( | starpu_data_handle_t | initial_handle, |
struct starpu_data_filter * | f, | ||
starpu_data_handle_t * | children | ||
) |
This plans for partitioning one StarPU data handle initial_handle
into several subdata according to the filter f
. The handles are returned into the children
array, which has to be the same size as the number of parts described in f
. These handles are not immediately usable, starpu_data_partition_submit has to be called to submit the actual partitioning.
Here is an example of how to use the function:
void starpu_data_partition_submit | ( | starpu_data_handle_t | initial_handle, |
unsigned | nparts, | ||
starpu_data_handle_t * | children | ||
) |
This submits the actual partitioning of initial_handle
into the nparts
children
handles. This call is asynchronous, it only submits that the partitioning should be done, so that the children
handles can now be used to submit tasks, and initial_handle
can not be used to submit tasks any more (to guarantee coherency).
For instance,
void starpu_data_partition_readonly_submit | ( | starpu_data_handle_t | initial_handle, |
unsigned | nparts, | ||
starpu_data_handle_t * | children | ||
) |
This is the same as starpu_data_partition_submit, but does not invalidate initial_handle
. This allows to continue using it, but the application has to be careful not to write to initial_handle
or children
handles, only read from them, since the coherency is otherwise not guaranteed. This thus allows to submit various tasks which concurrently read from various partitions of the data.
When the application wants to write to initial_handle
again, it should call starpu_data_unpartition_submit, which will properly add dependencies between the reads on the children
and the writes to be submitted.
If instead the application wants to write to children
handles, it should call starpu_data_partition_readwrite_upgrade_submit, which will properly add dependencies between the reads on the initial_handle
and the writes to be submitted.
void starpu_data_partition_readwrite_upgrade_submit | ( | starpu_data_handle_t | initial_handle, |
unsigned | nparts, | ||
starpu_data_handle_t * | children | ||
) |
This assumes that a partitioning of initial_handle
has already been submited in readonly mode through starpu_data_partition_readonly_submit, and will upgrade that partitioning into read-write mode for the children
, by invalidating initial_handle
, and adding the necessary dependencies.
void starpu_data_unpartition_submit | ( | starpu_data_handle_t | initial_handle, |
unsigned | nparts, | ||
starpu_data_handle_t * | children, | ||
int | gathering_node | ||
) |
This assumes that initial_handle
is partitioned into children
, and submits an unpartitionning of it, i.e. submitting a gathering of the pieces on the requested gathering_node
memory node, and submitting an invalidation of the children.
gathering_node
can be set to -1 to let the runtime decide which memory node should be used to gather the pieces.
This call is asynchronous, it only submits that the unpartitioning should be done, so that the children
handles should not be used to submit tasks any more, and initial_handle
can now be used again to submit tasks.
void starpu_data_unpartition_readonly_submit | ( | starpu_data_handle_t | initial_handle, |
unsigned | nparts, | ||
starpu_data_handle_t * | children, | ||
int | gathering_node | ||
) |
This assumes that initial_handle
is partitioned into children
, and submits just a readonly unpartitionning of it, i.e. submitting a gathering of the pieces on the requested gathering_node
memory node. It does not invalidate the children. This brings initial_handle
and children
handles to the same state as obtained with starpu_data_partition_readonly_submit.
gathering_node
can be set to -1 to let the runtime decide which memory node should be used to gather the pieces.
void starpu_data_partition_clean | ( | starpu_data_handle_t | root_data, |
unsigned | nparts, | ||
starpu_data_handle_t * | children | ||
) |
This should be used to clear the partition planning established between root_data
and children
with starpu_data_partition_plan. This will notably submit an unregister all the children
, which can thus not be used any more afterwards.
void starpu_vector_filter_block | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
Return in child_interface
the id
th element of the vector represented by father_interface
once partitioned in nparts
chunks of equal size.
void starpu_vector_filter_block_shadow | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
Return in child_interface
the id
th element of the vector represented by father_interface
once partitioned in nparts
chunks of equal size with a shadow border filter_arg_ptr
, thus getting a vector of size (n-2*shadow)/nparts+2*shadow. The filter_arg_ptr
field of f
must be the shadow size casted into void*. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. An usage example is available in examples/filters/shadow.c
void starpu_vector_filter_list_long | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
Return in child_interface
the id
th element of the vector represented by father_interface
once partitioned into nparts
chunks according to the filter_arg_ptr
field of f
. The filter_arg_ptr
field must point to an array of nparts
long elements, each of which specifies the number of elements in each chunk of the partition.
void starpu_vector_filter_list | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
Return in child_interface
the id
th element of the vector represented by father_interface
once partitioned into nparts
chunks according to the filter_arg_ptr
field of f
. The filter_arg_ptr
field must point to an array of nparts
uint32_t elements, each of which specifies the number of elements in each chunk of the partition.
void starpu_vector_filter_divide_in_2 | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
Return in child_interface
the id
th element of the vector represented by father_interface
once partitioned in 2
chunks of equal size, ignoring nparts. Thus, id
must be 0
or 1
.
void starpu_matrix_filter_block | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a dense Matrix along the x dimension, thus getting (x/nparts
,y) matrices. If nparts
does not divide x, the last submatrix contains the remainder.
void starpu_matrix_filter_block_shadow | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a dense Matrix along the x dimension, with a shadow border filter_arg_ptr
, thus getting ((x-2*shadow)/nparts
+2*shadow,y) matrices. If nparts
does not divide x-2*shadow, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. A usage example is available in examples/filters/shadow2d.c
void starpu_matrix_filter_vertical_block | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a dense Matrix along the y dimension, thus getting (x,y/nparts
) matrices. If nparts
does not divide y, the last submatrix contains the remainder.
void starpu_matrix_filter_vertical_block_shadow | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a dense Matrix along the y dimension, with a shadow border filter_arg_ptr
, thus getting (x,(y-2*shadow)/nparts
+2*shadow) matrices. If nparts
does not divide y-2*shadow, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts. A usage example is available in examples/filters/shadow2d.c
void starpu_block_filter_block | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a block along the X dimension, thus getting (x/nparts
,y,z) 3D matrices. If nparts
does not divide x, the last submatrix contains the remainder.
void starpu_block_filter_block_shadow | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a block along the X dimension, with a shadow border filter_arg_ptr
, thus getting ((x-2*shadow)/nparts
+2*shadow,y,z) blocks. If nparts
does not divide x, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts.
void starpu_block_filter_vertical_block | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a block along the Y dimension, thus getting (x,y/nparts
,z) blocks. If nparts
does not divide y, the last submatrix contains the remainder.
void starpu_block_filter_vertical_block_shadow | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a block along the Y dimension, with a shadow border filter_arg_ptr
, thus getting (x,(y-2*shadow)/nparts
+2*shadow,z) 3D matrices. If nparts
does not divide y, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts.
void starpu_block_filter_depth_block | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a block along the Z dimension, thus getting (x,y,z/nparts
) blocks. If nparts
does not divide z, the last submatrix contains the remainder.
void starpu_block_filter_depth_block_shadow | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a block along the Z dimension, with a shadow border filter_arg_ptr
, thus getting (x,y,(z-2*shadow)/nparts
+2*shadow) blocks. If nparts
does not divide z, the last submatrix contains the remainder. IMPORTANT: This can only be used for read-only access, as no coherency is enforced for the shadowed parts.
void starpu_bcsr_filter_canonical_block | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a block-sparse matrix into dense matrices.
void starpu_csr_filter_vertical_block | ( | void * | father_interface, |
void * | child_interface, | ||
struct starpu_data_filter * | f, | ||
unsigned | id, | ||
unsigned | nparts | ||
) |
This partitions a block-sparse matrix into vertical block-sparse matrices.