StarPU Handbook - StarPU Performances
|
Data Structures | |
struct | starpu_perfmodel_device |
struct | starpu_perfmodel_arch |
struct | starpu_perfmodel_history_entry |
struct | starpu_perfmodel_history_list |
struct | starpu_perfmodel_regression_model |
struct | starpu_perfmodel_per_arch |
struct | starpu_perfmodel |
Macros | |
#define | starpu_per_arch_perfmodel |
Typedefs | |
typedef double(* | starpu_perfmodel_per_arch_cost_function) (struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl) |
typedef size_t(* | starpu_perfmodel_per_arch_size_base) (struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned nimpl) |
typedef struct _starpu_perfmodel_state * | starpu_perfmodel_state_t |
Enumerations | |
enum | starpu_perfmodel_type { STARPU_PERFMODEL_INVALID , STARPU_PER_WORKER , STARPU_PER_ARCH , STARPU_COMMON , STARPU_HISTORY_BASED , STARPU_REGRESSION_BASED , STARPU_NL_REGRESSION_BASED , STARPU_MULTIPLE_REGRESSION_BASED } |
Functions | |
void | starpu_perfmodel_init (struct starpu_perfmodel *model) |
int | starpu_perfmodel_deinit (struct starpu_perfmodel *model) |
int | starpu_energy_start (int workerid, enum starpu_worker_archtype archi) |
int | starpu_energy_stop (struct starpu_perfmodel *model, struct starpu_task *task, unsigned nimpl, unsigned ntasks, int workerid, enum starpu_worker_archtype archi) |
int | starpu_perfmodel_load_file (const char *filename, struct starpu_perfmodel *model) |
int | starpu_perfmodel_load_symbol (const char *symbol, struct starpu_perfmodel *model) |
int | starpu_perfmodel_unload_model (struct starpu_perfmodel *model) |
void | starpu_save_history_based_model (struct starpu_perfmodel *model) |
void | starpu_perfmodel_get_model_path (const char *symbol, char *path, size_t maxlen) |
void | starpu_perfmodel_dump_xml (FILE *output, struct starpu_perfmodel *model) |
void | starpu_perfmodel_free_sampling (void) |
struct starpu_perfmodel_arch * | starpu_worker_get_perf_archtype (int workerid, unsigned sched_ctx_id) |
int | starpu_perfmodel_get_narch_combs (void) |
int | starpu_perfmodel_arch_comb_add (int ndevices, struct starpu_perfmodel_device *devices) |
int | starpu_perfmodel_arch_comb_get (int ndevices, struct starpu_perfmodel_device *devices) |
struct starpu_perfmodel_arch * | starpu_perfmodel_arch_comb_fetch (int comb) |
struct starpu_perfmodel_per_arch * | starpu_perfmodel_get_model_per_arch (struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned impl) |
struct starpu_perfmodel_per_arch * | starpu_perfmodel_get_model_per_devices (struct starpu_perfmodel *model, int impl,...) |
int | starpu_perfmodel_set_per_devices_cost_function (struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_cost_function func,...) |
int | starpu_perfmodel_set_per_devices_size_base (struct starpu_perfmodel *model, int impl, starpu_perfmodel_per_arch_size_base func,...) |
void | starpu_perfmodel_debugfilepath (struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, char *path, size_t maxlen, unsigned nimpl) |
const char * | starpu_perfmodel_get_archtype_name (enum starpu_worker_archtype archtype) |
void | starpu_perfmodel_get_arch_name (struct starpu_perfmodel_arch *arch, char *archname, size_t maxlen, unsigned nimpl) |
double | starpu_perfmodel_history_based_expected_perf (struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, uint32_t footprint) |
void | starpu_perfmodel_initialize (void) |
int | starpu_perfmodel_list (FILE *output) |
void | starpu_perfmodel_print (struct starpu_perfmodel *model, struct starpu_perfmodel_arch *arch, unsigned nimpl, char *parameter, uint32_t *footprint, FILE *output) |
int | starpu_perfmodel_print_all (struct starpu_perfmodel *model, char *arch, char *parameter, uint32_t *footprint, FILE *output) |
int | starpu_perfmodel_print_estimations (struct starpu_perfmodel *model, uint32_t footprint, FILE *output) |
int | starpu_perfmodel_list_combs (FILE *output, struct starpu_perfmodel *model) |
void | starpu_perfmodel_update_history (struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double measured) |
void | starpu_perfmodel_update_history_n (struct starpu_perfmodel *model, struct starpu_task *task, struct starpu_perfmodel_arch *arch, unsigned cpuid, unsigned nimpl, double average_measured, unsigned number) |
void | starpu_perfmodel_directory (FILE *output) |
void | starpu_bus_print_bandwidth (FILE *f) |
void | starpu_bus_print_affinity (FILE *f) |
void | starpu_bus_print_filenames (FILE *f) |
double | starpu_transfer_bandwidth (unsigned src_node, unsigned dst_node) |
double | starpu_transfer_latency (unsigned src_node, unsigned dst_node) |
double | starpu_transfer_predict (unsigned src_node, unsigned dst_node, size_t size) |
Variables | |
struct starpu_perfmodel | starpu_perfmodel_nop |
struct starpu_perfmodel_device |
todo
Data Fields | ||
---|---|---|
enum starpu_worker_archtype | type |
type of the device |
int | devid |
identifier of the precise device |
int | ncores |
number of execution in parallel, minus 1 |
struct starpu_perfmodel_arch |
todo
Data Fields | ||
---|---|---|
int | ndevices |
number of the devices for the given arch |
struct starpu_perfmodel_device * | devices |
list of the devices for the given arch |
struct starpu_perfmodel_history_entry |
Data Fields | ||
---|---|---|
double | mean |
mean_n = 1/n sum |
double | deviation |
n dev_n = sum2 - 1/n (sum)^2 |
double | sum |
sum of samples (in µs) |
double | sum2 |
sum of samples^2 |
unsigned | nsample |
number of samples |
unsigned | nerror | |
uint32_t | footprint |
data footprint |
size_t | size |
in bytes |
double | flops |
Provided by the application |
double | duration | |
starpu_tag_t | tag | |
double * | parameters |
struct starpu_perfmodel_history_list |
Data Fields | ||
---|---|---|
struct starpu_perfmodel_history_list * | next | |
struct starpu_perfmodel_history_entry * | entry |
struct starpu_perfmodel_regression_model |
todo
struct starpu_perfmodel_per_arch |
information about the performance model of a given arch.
Data Fields | |
starpu_perfmodel_per_arch_cost_function | cost_function |
starpu_perfmodel_per_arch_size_base | size_base |
char | debug_path [256] |
Private Attributes | |
struct starpu_perfmodel_history_table * | history |
struct starpu_perfmodel_history_list * | list |
struct starpu_perfmodel_regression_model | regression |
starpu_perfmodel_per_arch_cost_function starpu_perfmodel_per_arch::cost_function |
Used by STARPU_PER_ARCH, must point to functions which take a task, the target arch and implementation number (as mere conveniency, since the array is already indexed by these), and must return a task duration estimation in micro-seconds.
starpu_perfmodel_per_arch_size_base starpu_perfmodel_per_arch::size_base |
Same as in structure starpu_perfmodel, but per-arch, in case it depends on the architecture-specific implementation.
|
private |
The history of performance measurements.
|
private |
Used by STARPU_HISTORY_BASED, STARPU_NL_REGRESSION_BASED and STARPU_MULTIPLE_REGRESSION_BASED, records all execution history measures.
|
private |
Used by STARPU_REGRESSION_BASED, STARPU_NL_REGRESSION_BASED and STARPU_MULTIPLE_REGRESSION_BASED, contains the estimated factors of the regression.
struct starpu_perfmodel |
Contain all information about a performance model. At least the type and symbol fields have to be filled when defining a performance model for a codelet. For compatibility, make sure to initialize the whole structure to zero, either by using explicit memset, or by letting the compiler implicitly do it in e.g. static storage case. If not provided, other fields have to be zero.
Data Fields | |
enum starpu_perfmodel_type | type |
double(* | cost_function )(struct starpu_task *, unsigned nimpl) |
double(* | arch_cost_function )(struct starpu_task *, struct starpu_perfmodel_arch *arch, unsigned nimpl) |
double(* | worker_cost_function )(struct starpu_task *, unsigned workerid, unsigned nimpl) |
size_t(* | size_base )(struct starpu_task *, unsigned nimpl) |
uint32_t(* | footprint )(struct starpu_task *) |
const char * | symbol |
char * | path |
void(* | parameters )(struct starpu_task *task, double *parameters) |
Private Attributes | |
unsigned | is_loaded |
unsigned | benchmarking |
unsigned | is_init |
const char ** | parameters_names |
unsigned | nparameters |
unsigned ** | combinations |
unsigned | ncombinations |
starpu_perfmodel_state_t | state |
enum starpu_perfmodel_type starpu_perfmodel::type |
type of performance model
double(* starpu_perfmodel::cost_function) (struct starpu_task *, unsigned nimpl) |
Used by STARPU_COMMON. Take a task and implementation number, and must return a task duration estimation in micro-seconds.
double(* starpu_perfmodel::arch_cost_function) (struct starpu_task *, struct starpu_perfmodel_arch *arch, unsigned nimpl) |
Used by STARPU_PER_ARCH. Take a task, an arch and implementation number, and must return a task duration estimation in micro-seconds on that arch.
double(* starpu_perfmodel::worker_cost_function) (struct starpu_task *, unsigned workerid, unsigned nimpl) |
Used by STARPU_PER_WORKER. Take a task, a worker id and implementation number, and must return a task duration estimation in micro-seconds on that worker.
size_t(* starpu_perfmodel::size_base) (struct starpu_task *, unsigned nimpl) |
Used by STARPU_HISTORY_BASED, STARPU_REGRESSION_BASED and STARPU_NL_REGRESSION_BASED. If not NULL
, take a task and implementation number, and return the size to be used as index to distinguish histories and as a base for regressions.
uint32_t(* starpu_perfmodel::footprint) (struct starpu_task *) |
Used by STARPU_HISTORY_BASED. If not NULL
, take a task and return the footprint to be used as index to distinguish histories. The default is to use the starpu_task_data_footprint() function.
const char* starpu_perfmodel::symbol |
symbol name for the performance model, which will be used as file name to store the model. It must be set otherwise the model will be ignored.
char* starpu_perfmodel::path |
name of the file storing the performance model. It is non NULL if the model has been loaded or stored in a file.
|
private |
Whether the performance model is already loaded from the disk.
|
private |
Names of parameters used for multiple linear regression models (M, N, K)
|
private |
Number of parameters used for multiple linear regression models
|
private |
Table of combinations of parameters (and the exponents) used for multiple linear regression models
|
private |
Number of combination of parameters used for multiple linear regression models
todo
void starpu_perfmodel_init | ( | struct starpu_perfmodel * | model | ) |
Initialize the model
performance model structure. This is automatically called when e.g. submitting a task using a codelet using this performance model.
int starpu_perfmodel_deinit | ( | struct starpu_perfmodel * | model | ) |
Deinitialize the model
performance model structure. You need to call this before deallocating the structure. You will probably want to call starpu_perfmodel_unload_model() before calling this function, to save the perfmodel.
int starpu_energy_start | ( | int | workerid, |
enum starpu_worker_archtype | archi | ||
) |
starpu_energy_start - start counting hardware events in an event set
workerid
is the worker on which calibration is to be performed (in the case of GPUs, use -1 for CPUs)archi
is the type of architecture on which calibration will be runSee MeasuringEnergyandPower for more details.
int starpu_energy_stop | ( | struct starpu_perfmodel * | model, |
struct starpu_task * | task, | ||
unsigned | nimpl, | ||
unsigned | ntasks, | ||
int | workerid, | ||
enum starpu_worker_archtype | archi | ||
) |
starpu_energy_stop - stop counting hardware events in an event set
model
is the energy performance model to be filled with the resulttask
is a task specimen, so the performance model folds the result according to the parameter sizes of the task.nimpl
is the implementation number run during calibrationntasks
is the number of tasks run during calibrationworkerid
is the worker on which calibration was performed (in the case of GPUs, use -1 for CPUs)archi
is the type of architecture on which calibration was runSee MeasuringEnergyandPower for more details.
int starpu_perfmodel_load_file | ( | const char * | filename, |
struct starpu_perfmodel * | model | ||
) |
Load the performance model found in the file named filename
. model
has to be completely zero, and will be filled with the information stored in the given file.
int starpu_perfmodel_load_symbol | ( | const char * | symbol, |
struct starpu_perfmodel * | model | ||
) |
Load a given performance model. model
has to be completely zero, and will be filled with the information stored in $STARPU_HOME/.starpu
. The function is intended to be used by external tools that want to read the performance model files.
int starpu_perfmodel_unload_model | ( | struct starpu_perfmodel * | model | ) |
Unload model
which has been previously loaded through the function starpu_perfmodel_load_symbol()
void starpu_save_history_based_model | ( | struct starpu_perfmodel * | model | ) |
Save the performance model in its file.
void starpu_perfmodel_get_model_path | ( | const char * | symbol, |
char * | path, | ||
size_t | maxlen | ||
) |
Fills path
(supposed to be maxlen
long) with the full path to the performance model file for symbol symbol
. This path can later on be used for instance with starpu_perfmodel_load_file() .
void starpu_perfmodel_dump_xml | ( | FILE * | output, |
struct starpu_perfmodel * | model | ||
) |
Dump performance model model
to output stream output
, in XML format. See Performance Model Example for more details.
void starpu_perfmodel_free_sampling | ( | void | ) |
Free internal memory used for sampling management. It should only be called by an application which is not calling starpu_shutdown() as this function already calls it. See for example tools/starpu_perfmodel_display.c
.
struct starpu_perfmodel_arch * starpu_worker_get_perf_archtype | ( | int | workerid, |
unsigned | sched_ctx_id | ||
) |
Return the architecture type of the worker workerid
.
void starpu_perfmodel_debugfilepath | ( | struct starpu_perfmodel * | model, |
struct starpu_perfmodel_arch * | arch, | ||
char * | path, | ||
size_t | maxlen, | ||
unsigned | nimpl | ||
) |
Return the path to the debugging information for the performance model.
void starpu_perfmodel_get_arch_name | ( | struct starpu_perfmodel_arch * | arch, |
char * | archname, | ||
size_t | maxlen, | ||
unsigned | nimpl | ||
) |
Return the architecture name for arch
double starpu_perfmodel_history_based_expected_perf | ( | struct starpu_perfmodel * | model, |
struct starpu_perfmodel_arch * | arch, | ||
uint32_t | footprint | ||
) |
Return the estimated time in µs of a task with the given model and the given footprint.
void starpu_perfmodel_initialize | ( | void | ) |
If starpu_init() is not used, starpu_perfmodel_initialize() should be used called calling starpu_perfmodel_* functions.
int starpu_perfmodel_list | ( | FILE * | output | ) |
Print a list of all performance models on output
void starpu_perfmodel_update_history | ( | struct starpu_perfmodel * | model, |
struct starpu_task * | task, | ||
struct starpu_perfmodel_arch * | arch, | ||
unsigned | cpuid, | ||
unsigned | nimpl, | ||
double | measured | ||
) |
Feed the performance model model
with one explicit measurement (in µs or J), in addition to measurements done by StarPU itself. This can be useful when the application already has an existing set of measurements done in good conditions, that StarPU could benefit from instead of doing on-line measurements. An example of use can be seen in Performance Model Example.
Note that this records only one measurement, and StarPU would ignore the first measurement (since it is usually disturbed by library loading etc.). Make sure to call this function several times to record all your measurements.
You can also call starpu_perfmodel_update_history_n() to directly provide an average performed on several tasks.
See PerformanceModelCalibration for more details.
void starpu_perfmodel_update_history_n | ( | struct starpu_perfmodel * | model, |
struct starpu_task * | task, | ||
struct starpu_perfmodel_arch * | arch, | ||
unsigned | cpuid, | ||
unsigned | nimpl, | ||
double | average_measured, | ||
unsigned | number | ||
) |
Feed the performance model model
with an explicit average measurement (in µs or J).
This is similar to starpu_perfmodel_update_history(), but records a batch of number
measurements provided as the average of the measurements average_measured
.
void starpu_perfmodel_directory | ( | FILE * | output | ) |
Print the directory name storing performance models on output
void starpu_bus_print_bandwidth | ( | FILE * | f | ) |
Print a matrix of bus bandwidths on f
.
void starpu_bus_print_affinity | ( | FILE * | f | ) |
Print the affinity devices on f
.
void starpu_bus_print_filenames | ( | FILE * | f | ) |
Print on f
the name of the files containing the matrix of bus bandwidths, the affinity devices and the latency.
double starpu_transfer_bandwidth | ( | unsigned | src_node, |
unsigned | dst_node | ||
) |
Return the bandwidth of data transfer between two memory nodes. See SchedulingHelpers for more details.
double starpu_transfer_latency | ( | unsigned | src_node, |
unsigned | dst_node | ||
) |
Return the latency of data transfer between two memory nodes. See SchedulingHelpers for more details.
double starpu_transfer_predict | ( | unsigned | src_node, |
unsigned | dst_node, | ||
size_t | size | ||
) |
Return the estimated time to transfer a given size between two memory nodes. See SchedulingHelpers for more details.
|
extern |
Performance model which just always return 1µs.