/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef CUDA_INTERVAL_LIB_H #define CUDA_INTERVAL_LIB_H #include "cuda_interval_rounded_arith.h" // Interval template class and basic operations // Interface inspired from the Boost Interval library (www.boost.org) template class interval_gpu { public: __device__ __host__ interval_gpu(); __device__ __host__ interval_gpu(T const &v); __device__ __host__ interval_gpu(T const &l, T const &u); __device__ __host__ T const &lower() const; __device__ __host__ T const &upper() const; static __device__ __host__ interval_gpu empty(); private: T low; T up; }; // Constructors template inline __device__ __host__ interval_gpu::interval_gpu() {} template inline __device__ __host__ interval_gpu::interval_gpu(T const &l, T const &u) : low(l), up(u) {} template inline __device__ __host__ interval_gpu::interval_gpu(T const &v) : low(v), up(v) {} template inline __device__ __host__ T const &interval_gpu::lower() const { return low; } template inline __device__ __host__ T const &interval_gpu::upper() const { return up; } template inline __device__ __host__ interval_gpu interval_gpu::empty() { rounded_arith rnd; return interval_gpu(rnd.nan(), rnd.nan()); } template inline __device__ __host__ bool empty(interval_gpu x) { T hash = x.lower() + x.upper(); return (hash != hash); } template inline __device__ T width(interval_gpu x) { if (empty(x)) return 0; rounded_arith rnd; return rnd.sub_up(x.upper(), x.lower()); } // Arithmetic operations // Unary operators template inline __device__ interval_gpu const &operator+(interval_gpu const &x) { return x; } template inline __device__ interval_gpu operator-(interval_gpu const &x) { return interval_gpu(-x.upper(), -x.lower()); } // Binary operators template inline __device__ interval_gpu operator+(interval_gpu const &x, interval_gpu const &y) { rounded_arith rnd; return interval_gpu(rnd.add_down(x.lower(), y.lower()), rnd.add_up(x.upper(), y.upper())); } template inline __device__ interval_gpu operator-(interval_gpu const &x, interval_gpu const &y) { rounded_arith rnd; return interval_gpu(rnd.sub_down(x.lower(), y.upper()), rnd.sub_up(x.upper(), y.lower())); } inline __device__ float min4(float a, float b, float c, float d) { return fminf(fminf(a, b), fminf(c, d)); } inline __device__ float max4(float a, float b, float c, float d) { return fmaxf(fmaxf(a, b), fmaxf(c, d)); } inline __device__ double min4(double a, double b, double c, double d) { return fmin(fmin(a, b), fmin(c, d)); } inline __device__ double max4(double a, double b, double c, double d) { return fmax(fmax(a, b), fmax(c, d)); } template inline __device__ interval_gpu operator*(interval_gpu const &x, interval_gpu const &y) { // Textbook implementation: 14 flops, but no branch. rounded_arith rnd; return interval_gpu( min4(rnd.mul_down(x.lower(), y.lower()), rnd.mul_down(x.lower(), y.upper()), rnd.mul_down(x.upper(), y.lower()), rnd.mul_down(x.upper(), y.upper())), max4(rnd.mul_up(x.lower(), y.lower()), rnd.mul_up(x.lower(), y.upper()), rnd.mul_up(x.upper(), y.lower()), rnd.mul_up(x.upper(), y.upper()))); } // Center of an interval // Typically used for bisection template inline __device__ T median(interval_gpu const &x) { rounded_arith rnd; return rnd.median(x.lower(), x.upper()); } // Intersection between two intervals (can be empty) template inline __device__ interval_gpu intersect(interval_gpu const &x, interval_gpu const &y) { rounded_arith rnd; T const &l = rnd.max(x.lower(), y.lower()); T const &u = rnd.min(x.upper(), y.upper()); if (l <= u) return interval_gpu(l, u); else return interval_gpu::empty(); } // Division by an interval which does not contain 0. // GPU-optimized implementation assuming division is expensive template inline __device__ interval_gpu div_non_zero(interval_gpu const &x, interval_gpu const &y) { rounded_arith rnd; typedef interval_gpu I; T xl, yl, xu, yu; if (y.upper() < 0) { xl = x.upper(); xu = x.lower(); } else { xl = x.lower(); xu = x.upper(); } if (x.upper() < 0) { yl = y.lower(); yu = y.upper(); } else if (x.lower() < 0) { if (y.upper() < 0) { yl = y.upper(); yu = y.upper(); } else { yl = y.lower(); yu = y.lower(); } } else { yl = y.upper(); yu = y.lower(); } return I(rnd.div_down(xl, yl), rnd.div_up(xu, yu)); } template inline __device__ interval_gpu div_positive(interval_gpu const &x, T const &yu) { // assert(yu > 0); if (x.lower() == 0 && x.upper() == 0) return x; rounded_arith rnd; typedef interval_gpu I; const T &xl = x.lower(); const T &xu = x.upper(); if (xu < 0) return I(rnd.neg_inf(), rnd.div_up(xu, yu)); else if (xl < 0) return I(rnd.neg_inf(), rnd.pos_inf()); else return I(rnd.div_down(xl, yu), rnd.pos_inf()); } template inline __device__ interval_gpu div_negative(interval_gpu const &x, T const &yl) { // assert(yu > 0); if (x.lower() == 0 && x.upper() == 0) return x; rounded_arith rnd; typedef interval_gpu I; const T &xl = x.lower(); const T &xu = x.upper(); if (xu < 0) return I(rnd.div_down(xu, yl), rnd.pos_inf()); else if (xl < 0) return I(rnd.neg_inf(), rnd.pos_inf()); else return I(rnd.neg_inf(), rnd.div_up(xl, yl)); } template inline __device__ interval_gpu div_zero_part1(interval_gpu const &x, interval_gpu const &y, bool &b) { if (x.lower() == 0 && x.upper() == 0) { b = false; return x; } rounded_arith rnd; typedef interval_gpu I; const T &xl = x.lower(); const T &xu = x.upper(); const T &yl = y.lower(); const T &yu = y.upper(); if (xu < 0) { b = true; return I(rnd.neg_inf(), rnd.div_up(xu, yu)); } else if (xl < 0) { b = false; return I(rnd.neg_inf(), rnd.pos_inf()); } else { b = true; return I(rnd.neg_inf(), rnd.div_up(xl, yl)); } } template inline __device__ interval_gpu div_zero_part2(interval_gpu const &x, interval_gpu const &y) { rounded_arith rnd; typedef interval_gpu I; const T &xl = x.lower(); const T &xu = x.upper(); const T &yl = y.lower(); const T &yu = y.upper(); if (xu < 0) return I(rnd.div_down(xu, yl), rnd.pos_inf()); else return I(rnd.div_down(xl, yu), rnd.pos_inf()); } template inline __device__ interval_gpu division_part1(interval_gpu const &x, interval_gpu const &y, bool &b) { b = false; if (y.lower() <= 0 && y.upper() >= 0) if (y.lower() != 0) if (y.upper() != 0) return div_zero_part1(x, y, b); else return div_negative(x, y.lower()); else if (y.upper() != 0) return div_positive(x, y.upper()); else return interval_gpu::empty(); else return div_non_zero(x, y); } template inline __device__ interval_gpu division_part2(interval_gpu const &x, interval_gpu const &y, bool b = true) { if (!b) return interval_gpu::empty(); return div_zero_part2(x, y); } template inline __device__ interval_gpu square(interval_gpu const &x) { typedef interval_gpu I; rounded_arith rnd; const T &xl = x.lower(); const T &xu = x.upper(); if (xl >= 0) return I(rnd.mul_down(xl, xl), rnd.mul_up(xu, xu)); else if (xu <= 0) return I(rnd.mul_down(xu, xu), rnd.mul_up(xl, xl)); else return I(static_cast(0), rnd.max(rnd.mul_up(xl, xl), rnd.mul_up(xu, xu))); } #endif