blob: 92630936d3f9ea6e3e15c4085ceef089e36385ba [file] [log] [blame]
// -*- C++ -*-
// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms
// of the GNU General Public License as published by the Free Software
// Foundation; either version 3, or (at your option) any later
// version.
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/** @file parallel/partial_sum.h
* @brief Parallel implementation of std::partial_sum(), i. e. prefix
* sums.
* This file is a GNU parallel extension to the Standard C++ Library.
*/
// Written by Johannes Singler.
#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
#include <omp.h>
#include <new>
#include <bits/stl_algobase.h>
#include <parallel/parallel.h>
#include <parallel/numericfwd.h>
namespace __gnu_parallel
{
// Problem: there is no 0-element given.
/** @brief Base case prefix sum routine.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param result Begin iterator of output sequence.
* @param bin_op Associative binary function.
* @param value Start value. Must be passed since the neutral
* element is unknown in general.
* @return End iterator of output sequence. */
template<typename InputIterator,
typename OutputIterator,
typename BinaryOperation>
OutputIterator
parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op,
typename std::iterator_traits
<InputIterator>::value_type value)
{
if (begin == end)
return result;
while (begin != end)
{
value = bin_op(value, *begin);
*result = value;
++result;
++begin;
}
return result;
}
/** @brief Parallel partial sum implementation, two-phase approach,
no recursion.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param result Begin iterator of output sequence.
* @param bin_op Associative binary function.
* @param n Length of sequence.
* @param num_threads Number of threads to use.
* @return End iterator of output sequence.
*/
template<typename InputIterator,
typename OutputIterator,
typename BinaryOperation>
OutputIterator
parallel_partial_sum_linear(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op,
typename std::iterator_traits
<InputIterator>::difference_type n)
{
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
if (begin == end)
return result;
thread_index_t num_threads =
std::min<difference_type>(get_max_threads(), n - 1);
if (num_threads < 2)
{
*result = *begin;
return parallel_partial_sum_basecase(
begin + 1, end, result + 1, bin_op, *begin);
}
difference_type* borders;
value_type* sums;
const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads(num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
borders = new difference_type[num_threads + 2];
if (__s.partial_sum_dilation == 1.0f)
equally_split(n, num_threads + 1, borders);
else
{
difference_type chunk_length =
((double)n
/ ((double)num_threads + __s.partial_sum_dilation)),
borderstart = n - num_threads * chunk_length;
borders[0] = 0;
for (int i = 1; i < (num_threads + 1); ++i)
{
borders[i] = borderstart;
borderstart += chunk_length;
}
borders[num_threads + 1] = n;
}
sums = static_cast<value_type*>(::operator new(sizeof(value_type)
* num_threads));
OutputIterator target_end;
} //single
thread_index_t iam = omp_get_thread_num();
if (iam == 0)
{
*result = *begin;
parallel_partial_sum_basecase(begin + 1, begin + borders[1],
result + 1, bin_op, *begin);
::new(&(sums[iam])) value_type(*(result + borders[1] - 1));
}
else
{
::new(&(sums[iam]))
value_type(std::accumulate(begin + borders[iam] + 1,
begin + borders[iam + 1],
*(begin + borders[iam]),
bin_op,
__gnu_parallel::sequential_tag()));
}
# pragma omp barrier
# pragma omp single
parallel_partial_sum_basecase(
sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]);
# pragma omp barrier
// Still same team.
parallel_partial_sum_basecase(begin + borders[iam + 1],
begin + borders[iam + 2],
result + borders[iam + 1], bin_op,
sums[iam]);
} //parallel
::operator delete(sums);
delete[] borders;
return result + n;
}
/** @brief Parallel partial sum front-end.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param result Begin iterator of output sequence.
* @param bin_op Associative binary function.
* @return End iterator of output sequence. */
template<typename InputIterator,
typename OutputIterator,
typename BinaryOperation>
OutputIterator
parallel_partial_sum(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op)
{
_GLIBCXX_CALL(begin - end)
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
difference_type n = end - begin;
switch (_Settings::get().partial_sum_algorithm)
{
case LINEAR:
// Need an initial offset.
return parallel_partial_sum_linear(begin, end, result, bin_op, n);
default:
// Partial_sum algorithm not implemented.
_GLIBCXX_PARALLEL_ASSERT(0);
return result + n;
}
}
}
#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */