Lift
Library of parallel computing primitives for GPUs and multi-core CPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
for_each.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015, NVIDIA CORPORATION
3  * Copyright (c) 2015, Nuno Subtil <subtil@gmail.com>
4  * Copyright (c) 2015, Roche Molecular Systems Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  * * Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  * * Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the distribution.
14  * * Neither the name of the copyright holders nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #pragma once
31 
32 #include <algorithm>
33 
34 #include "../../types.h"
35 #include "launch_parameters.h"
36 
37 namespace lift {
38 
39 template <typename InputIterator, typename Function, typename index_type>
40 __global__ void for_each_kernel(InputIterator input, size_t length, Function func)
41 {
42  index_type index;
43 
44  for(index = blockIdx.x * blockDim.x + threadIdx.x;
45  index < length;
46  index += blockDim.x * gridDim.x)
47  {
48  func(input[index]);
49  }
50 }
51 
52 template <typename InputIterator, typename Function>
53 void for_each(InputIterator input, size_t length, Function func, int2 launch_params = { 0, 0 })
54 {
55  if (launch_params.x == 0 &&
56  launch_params.y == 0)
57  {
58  int2 params_64 = launch_parameters(for_each_kernel<InputIterator, Function, uint64>, length);
59  int2 params_32 = launch_parameters(for_each_kernel<InputIterator, Function, uint32>, length);
60 
61  // figure out the type of the index required
62  if (uint64(length) + params_32.x * params_32.y >= uint64(1 << 31))
63  {
64 // printf("computed launch params (64): %d %d\n", params_64.x, params_64.y);
65  for_each_kernel<InputIterator, Function, uint64> <<<params_64.x, params_64.y>>>(input, length, func);
66  } else {
67 // printf("computed launch params (32): %d %d\n", params_32.x, params_32.y);
68  for_each_kernel<InputIterator, Function, uint32> <<<params_32.x, params_32.y>>>(input, length, func);
69  }
70  } else if (launch_params.x == 0) {
71  launch_params.x = int((length + launch_params.y - 1) / launch_params.y);
72 
73  if (uint64(length) + launch_params.x * launch_params.y >= uint64(1 << 31))
74  {
75  for_each_kernel<InputIterator, Function, uint64> <<<launch_params.x, launch_params.y>>>(input, length, func);
76  } else {
77  for_each_kernel<InputIterator, Function, uint32> <<<launch_params.x, launch_params.y>>>(input, length, func);
78  }
79  } else {
80  // make sure the launch parameters are not overcommitted
81  int max_blocks = int((length + launch_params.y - 1) / launch_params.y);
82 
83  if (launch_params.x > int((length + launch_params.y - 1) / launch_params.y))
84  {
85  fprintf(stderr, "WARNING: for_each call overcommitted, reducing block size to %d\n", max_blocks);
86  launch_params.x = max_blocks;
87  }
88 
89  // figure out the type of the index required
90  if (uint64(length) + launch_params.x * launch_params.y >= uint64(1 << 31))
91  {
92  for_each_kernel<InputIterator, Function, uint64> <<<launch_params.x, launch_params.y>>>(input, length, func);
93  } else {
94 
95  for_each_kernel<InputIterator, Function, uint32> <<<launch_params.x, launch_params.y>>>(input, length, func);
96  }
97  }
98 }
99 
100 template <typename InputIterator, typename Function>
101 int2 for_each_launch_parameters(InputIterator input, size_t length, Function func)
102 {
103  int2 params_64 = launch_parameters(for_each_kernel<InputIterator, Function, uint64>, length);
104  int2 params_32 = launch_parameters(for_each_kernel<InputIterator, Function, uint32>, length);
105 
106  if (uint64(length) + params_32.x * params_32.y >= uint64(1 << 31))
107  {
108  return params_64;
109  } else {
110  return params_32;
111  }
112 }
113 
114 } // namespace lift
uint64_t uint64
Definition: types.h:45
__global__ void for_each_kernel(InputIterator input, size_t length, Function func)
Definition: for_each.h:40
int2 for_each_launch_parameters(InputIterator input, size_t length, Function func)
Definition: for_each.h:101
int2 launch_parameters(T kernel, size_t elements, int dynamic_smem_size=0)
void for_each(InputIterator input, size_t length, Function func, int2 launch_params={0, 0})
Definition: for_each.h:53