| // Written in the D programming language. |
| |
| /** |
| This module is a port of a growing fragment of the $(D_PARAM numeric) |
| header in Alexander Stepanov's $(LINK2 http://sgi.com/tech/stl, |
| Standard Template Library), with a few additions. |
| |
| Macros: |
| Copyright: Copyright Andrei Alexandrescu 2008 - 2009. |
| License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). |
| Authors: $(HTTP erdani.org, Andrei Alexandrescu), |
| Don Clugston, Robert Jacques, Ilya Yaroshenko |
| Source: $(PHOBOSSRC std/_numeric.d) |
| */ |
| /* |
| Copyright Andrei Alexandrescu 2008 - 2009. |
| Distributed under the Boost Software License, Version 1.0. |
| (See accompanying file LICENSE_1_0.txt or copy at |
| http://www.boost.org/LICENSE_1_0.txt) |
| */ |
| module std.numeric; |
| |
| import std.complex; |
| import std.math; |
| import std.range.primitives; |
| import std.traits; |
| import std.typecons; |
| |
| version (unittest) |
| { |
| import std.stdio; |
| } |
| /// Format flags for CustomFloat. |
| public enum CustomFloatFlags |
| { |
| /// Adds a sign bit to allow for signed numbers. |
| signed = 1, |
| |
| /** |
| * Store values in normalized form by default. The actual precision of the |
| * significand is extended by 1 bit by assuming an implicit leading bit of 1 |
| * instead of 0. i.e. $(D 1.nnnn) instead of $(D 0.nnnn). |
| * True for all $(LINK2 https://en.wikipedia.org/wiki/IEEE_floating_point, IEE754) types |
| */ |
| storeNormalized = 2, |
| |
| /** |
| * Stores the significand in $(LINK2 https://en.wikipedia.org/wiki/IEEE_754-1985#Denormalized_numbers, |
| * IEEE754 denormalized) form when the exponent is 0. Required to express the value 0. |
| */ |
| allowDenorm = 4, |
| |
| /** |
| * Allows the storage of $(LINK2 https://en.wikipedia.org/wiki/IEEE_754-1985#Positive_and_negative_infinity, |
| * IEEE754 _infinity) values. |
| */ |
| infinity = 8, |
| |
| /// Allows the storage of $(LINK2 https://en.wikipedia.org/wiki/NaN, IEEE754 Not a Number) values. |
| nan = 16, |
| |
| /** |
| * If set, select an exponent bias such that max_exp = 1. |
| * i.e. so that the maximum value is >= 1.0 and < 2.0. |
| * Ignored if the exponent bias is manually specified. |
| */ |
| probability = 32, |
| |
| /// If set, unsigned custom floats are assumed to be negative. |
| negativeUnsigned = 64, |
| |
| /**If set, 0 is the only allowed $(LINK2 https://en.wikipedia.org/wiki/IEEE_754-1985#Denormalized_numbers, |
| * IEEE754 denormalized) number. |
| * Requires allowDenorm and storeNormalized. |
| */ |
| allowDenormZeroOnly = 128 | allowDenorm | storeNormalized, |
| |
| /// Include _all of the $(LINK2 https://en.wikipedia.org/wiki/IEEE_floating_point, IEEE754) options. |
| ieee = signed | storeNormalized | allowDenorm | infinity | nan , |
| |
| /// Include none of the above options. |
| none = 0 |
| } |
| |
| private template CustomFloatParams(uint bits) |
| { |
| enum CustomFloatFlags flags = CustomFloatFlags.ieee |
| ^ ((bits == 80) ? CustomFloatFlags.storeNormalized : CustomFloatFlags.none); |
| static if (bits == 8) alias CustomFloatParams = CustomFloatParams!( 4, 3, flags); |
| static if (bits == 16) alias CustomFloatParams = CustomFloatParams!(10, 5, flags); |
| static if (bits == 32) alias CustomFloatParams = CustomFloatParams!(23, 8, flags); |
| static if (bits == 64) alias CustomFloatParams = CustomFloatParams!(52, 11, flags); |
| static if (bits == 80) alias CustomFloatParams = CustomFloatParams!(64, 15, flags); |
| } |
| |
| private template CustomFloatParams(uint precision, uint exponentWidth, CustomFloatFlags flags) |
| { |
| import std.meta : AliasSeq; |
| alias CustomFloatParams = |
| AliasSeq!( |
| precision, |
| exponentWidth, |
| flags, |
| (1 << (exponentWidth - ((flags & flags.probability) == 0))) |
| - ((flags & (flags.nan | flags.infinity)) != 0) - ((flags & flags.probability) != 0) |
| ); // ((flags & CustomFloatFlags.probability) == 0) |
| } |
| |
| /** |
| * Allows user code to define custom floating-point formats. These formats are |
| * for storage only; all operations on them are performed by first implicitly |
| * extracting them to $(D real) first. After the operation is completed the |
| * result can be stored in a custom floating-point value via assignment. |
| */ |
| template CustomFloat(uint bits) |
| if (bits == 8 || bits == 16 || bits == 32 || bits == 64 || bits == 80) |
| { |
| alias CustomFloat = CustomFloat!(CustomFloatParams!(bits)); |
| } |
| |
| /// ditto |
| template CustomFloat(uint precision, uint exponentWidth, CustomFloatFlags flags = CustomFloatFlags.ieee) |
| if (((flags & flags.signed) + precision + exponentWidth) % 8 == 0 && precision + exponentWidth > 0) |
| { |
| alias CustomFloat = CustomFloat!(CustomFloatParams!(precision, exponentWidth, flags)); |
| } |
| |
| /// |
| @safe unittest |
| { |
| import std.math : sin, cos; |
| |
| // Define a 16-bit floating point values |
| CustomFloat!16 x; // Using the number of bits |
| CustomFloat!(10, 5) y; // Using the precision and exponent width |
| CustomFloat!(10, 5,CustomFloatFlags.ieee) z; // Using the precision, exponent width and format flags |
| CustomFloat!(10, 5,CustomFloatFlags.ieee, 15) w; // Using the precision, exponent width, format flags and exponent offset bias |
| |
| // Use the 16-bit floats mostly like normal numbers |
| w = x*y - 1; |
| |
| // Functions calls require conversion |
| z = sin(+x) + cos(+y); // Use unary plus to concisely convert to a real |
| z = sin(x.get!float) + cos(y.get!float); // Or use get!T |
| z = sin(cast(float) x) + cos(cast(float) y); // Or use cast(T) to explicitly convert |
| |
| // Define a 8-bit custom float for storing probabilities |
| alias Probability = CustomFloat!(4, 4, CustomFloatFlags.ieee^CustomFloatFlags.probability^CustomFloatFlags.signed ); |
| auto p = Probability(0.5); |
| } |
| |
| /// ditto |
| struct CustomFloat(uint precision, // fraction bits (23 for float) |
| uint exponentWidth, // exponent bits (8 for float) Exponent width |
| CustomFloatFlags flags, |
| uint bias) |
| if (((flags & flags.signed) + precision + exponentWidth) % 8 == 0 && |
| precision + exponentWidth > 0) |
| { |
| import std.bitmanip : bitfields; |
| import std.meta : staticIndexOf; |
| private: |
| // get the correct unsigned bitfield type to support > 32 bits |
| template uType(uint bits) |
| { |
| static if (bits <= size_t.sizeof*8) alias uType = size_t; |
| else alias uType = ulong ; |
| } |
| |
| // get the correct signed bitfield type to support > 32 bits |
| template sType(uint bits) |
| { |
| static if (bits <= ptrdiff_t.sizeof*8-1) alias sType = ptrdiff_t; |
| else alias sType = long; |
| } |
| |
| alias T_sig = uType!precision; |
| alias T_exp = uType!exponentWidth; |
| alias T_signed_exp = sType!exponentWidth; |
| |
| alias Flags = CustomFloatFlags; |
| |
| // Facilitate converting numeric types to custom float |
| union ToBinary(F) |
| if (is(typeof(CustomFloatParams!(F.sizeof*8))) || is(F == real)) |
| { |
| F set; |
| |
| // If on Linux or Mac, where 80-bit reals are padded, ignore the |
| // padding. |
| import std.algorithm.comparison : min; |
| CustomFloat!(CustomFloatParams!(min(F.sizeof*8, 80))) get; |
| |
| // Convert F to the correct binary type. |
| static typeof(get) opCall(F value) |
| { |
| ToBinary r; |
| r.set = value; |
| return r.get; |
| } |
| alias get this; |
| } |
| |
| // Perform IEEE rounding with round to nearest detection |
| void roundedShift(T,U)(ref T sig, U shift) |
| { |
| if (sig << (T.sizeof*8 - shift) == cast(T) 1uL << (T.sizeof*8 - 1)) |
| { |
| // round to even |
| sig >>= shift; |
| sig += sig & 1; |
| } |
| else |
| { |
| sig >>= shift - 1; |
| sig += sig & 1; |
| // Perform standard rounding |
| sig >>= 1; |
| } |
| } |
| |
| // Convert the current value to signed exponent, normalized form |
| void toNormalized(T,U)(ref T sig, ref U exp) |
| { |
| sig = significand; |
| auto shift = (T.sizeof*8) - precision; |
| exp = exponent; |
| static if (flags&(Flags.infinity|Flags.nan)) |
| { |
| // Handle inf or nan |
| if (exp == exponent_max) |
| { |
| exp = exp.max; |
| sig <<= shift; |
| static if (flags&Flags.storeNormalized) |
| { |
| // Save inf/nan in denormalized format |
| sig >>= 1; |
| sig += cast(T) 1uL << (T.sizeof*8 - 1); |
| } |
| return; |
| } |
| } |
| if ((~flags&Flags.storeNormalized) || |
| // Convert denormalized form to normalized form |
| ((flags&Flags.allowDenorm) && exp == 0)) |
| { |
| if (sig > 0) |
| { |
| import core.bitop : bsr; |
| auto shift2 = precision - bsr(sig); |
| exp -= shift2-1; |
| shift += shift2; |
| } |
| else // value = 0.0 |
| { |
| exp = exp.min; |
| return; |
| } |
| } |
| sig <<= shift; |
| exp -= bias; |
| } |
| |
| // Set the current value from signed exponent, normalized form |
| void fromNormalized(T,U)(ref T sig, ref U exp) |
| { |
| auto shift = (T.sizeof*8) - precision; |
| if (exp == exp.max) |
| { |
| // infinity or nan |
| exp = exponent_max; |
| static if (flags & Flags.storeNormalized) |
| sig <<= 1; |
| |
| // convert back to normalized form |
| static if (~flags & Flags.infinity) |
| // No infinity support? |
| assert(sig != 0, "Infinity floating point value assigned to a " |
| ~ typeof(this).stringof ~ " (no infinity support)."); |
| |
| static if (~flags & Flags.nan) // No NaN support? |
| assert(sig == 0, "NaN floating point value assigned to a " ~ |
| typeof(this).stringof ~ " (no nan support)."); |
| sig >>= shift; |
| return; |
| } |
| if (exp == exp.min) // 0.0 |
| { |
| exp = 0; |
| sig = 0; |
| return; |
| } |
| |
| exp += bias; |
| if (exp <= 0) |
| { |
| static if ((flags&Flags.allowDenorm) || |
| // Convert from normalized form to denormalized |
| (~flags&Flags.storeNormalized)) |
| { |
| shift += -exp; |
| roundedShift(sig,1); |
| sig += cast(T) 1uL << (T.sizeof*8 - 1); |
| // Add the leading 1 |
| exp = 0; |
| } |
| else |
| assert((flags&Flags.storeNormalized) && exp == 0, |
| "Underflow occured assigning to a " ~ |
| typeof(this).stringof ~ " (no denormal support)."); |
| } |
| else |
| { |
| static if (~flags&Flags.storeNormalized) |
| { |
| // Convert from normalized form to denormalized |
| roundedShift(sig,1); |
| sig += cast(T) 1uL << (T.sizeof*8 - 1); |
| // Add the leading 1 |
| } |
| } |
| |
| if (shift > 0) |
| roundedShift(sig,shift); |
| if (sig > significand_max) |
| { |
| // handle significand overflow (should only be 1 bit) |
| static if (~flags&Flags.storeNormalized) |
| { |
| sig >>= 1; |
| } |
| else |
| sig &= significand_max; |
| exp++; |
| } |
| static if ((flags&Flags.allowDenormZeroOnly)==Flags.allowDenormZeroOnly) |
| { |
| // disallow non-zero denormals |
| if (exp == 0) |
| { |
| sig <<= 1; |
| if (sig > significand_max && (sig&significand_max) > 0) |
| // Check and round to even |
| exp++; |
| sig = 0; |
| } |
| } |
| |
| if (exp >= exponent_max) |
| { |
| static if (flags&(Flags.infinity|Flags.nan)) |
| { |
| sig = 0; |
| exp = exponent_max; |
| static if (~flags&(Flags.infinity)) |
| assert(0, "Overflow occured assigning to a " ~ |
| typeof(this).stringof ~ " (no infinity support)."); |
| } |
| else |
| assert(exp == exponent_max, "Overflow occured assigning to a " |
| ~ typeof(this).stringof ~ " (no infinity support)."); |
| } |
| } |
| |
| public: |
| static if (precision == 64) // CustomFloat!80 support hack |
| { |
| ulong significand; |
| enum ulong significand_max = ulong.max; |
| mixin(bitfields!( |
| T_exp , "exponent", exponentWidth, |
| bool , "sign" , flags & flags.signed )); |
| } |
| else |
| { |
| mixin(bitfields!( |
| T_sig, "significand", precision, |
| T_exp, "exponent" , exponentWidth, |
| bool , "sign" , flags & flags.signed )); |
| } |
| |
| /// Returns: infinity value |
| static if (flags & Flags.infinity) |
| static @property CustomFloat infinity() |
| { |
| CustomFloat value; |
| static if (flags & Flags.signed) |
| value.sign = 0; |
| value.significand = 0; |
| value.exponent = exponent_max; |
| return value; |
| } |
| |
| /// Returns: NaN value |
| static if (flags & Flags.nan) |
| static @property CustomFloat nan() |
| { |
| CustomFloat value; |
| static if (flags & Flags.signed) |
| value.sign = 0; |
| value.significand = cast(typeof(significand_max)) 1L << (precision-1); |
| value.exponent = exponent_max; |
| return value; |
| } |
| |
| /// Returns: number of decimal digits of precision |
| static @property size_t dig() |
| { |
| auto shiftcnt = precision - ((flags&Flags.storeNormalized) != 0); |
| immutable x = (shiftcnt == 64) ? 0 : 1uL << shiftcnt; |
| return cast(size_t) log10(x); |
| } |
| |
| /// Returns: smallest increment to the value 1 |
| static @property CustomFloat epsilon() |
| { |
| CustomFloat value; |
| static if (flags & Flags.signed) |
| value.sign = 0; |
| T_signed_exp exp = -precision; |
| T_sig sig = 0; |
| |
| value.fromNormalized(sig,exp); |
| if (exp == 0 && sig == 0) // underflowed to zero |
| { |
| static if ((flags&Flags.allowDenorm) || |
| (~flags&Flags.storeNormalized)) |
| sig = 1; |
| else |
| sig = cast(T) 1uL << (precision - 1); |
| } |
| value.exponent = cast(value.T_exp) exp; |
| value.significand = cast(value.T_sig) sig; |
| return value; |
| } |
| |
| /// the number of bits in mantissa |
| enum mant_dig = precision + ((flags&Flags.storeNormalized) != 0); |
| |
| /// Returns: maximum int value such that 10<sup>max_10_exp</sup> is representable |
| static @property int max_10_exp(){ return cast(int) log10( +max ); } |
| |
| /// maximum int value such that 2<sup>max_exp-1</sup> is representable |
| enum max_exp = exponent_max-bias+((~flags&(Flags.infinity|flags.nan))!=0); |
| |
| /// Returns: minimum int value such that 10<sup>min_10_exp</sup> is representable |
| static @property int min_10_exp(){ return cast(int) log10( +min_normal ); } |
| |
| /// minimum int value such that 2<sup>min_exp-1</sup> is representable as a normalized value |
| enum min_exp = cast(T_signed_exp)-bias +1+ ((flags&Flags.allowDenorm)!=0); |
| |
| /// Returns: largest representable value that's not infinity |
| static @property CustomFloat max() |
| { |
| CustomFloat value; |
| static if (flags & Flags.signed) |
| value.sign = 0; |
| value.exponent = exponent_max - ((flags&(flags.infinity|flags.nan)) != 0); |
| value.significand = significand_max; |
| return value; |
| } |
| |
| /// Returns: smallest representable normalized value that's not 0 |
| static @property CustomFloat min_normal() { |
| CustomFloat value; |
| static if (flags & Flags.signed) |
| value.sign = 0; |
| value.exponent = 1; |
| static if (flags&Flags.storeNormalized) |
| value.significand = 0; |
| else |
| value.significand = cast(T_sig) 1uL << (precision - 1); |
| return value; |
| } |
| |
| /// Returns: real part |
| @property CustomFloat re() { return this; } |
| |
| /// Returns: imaginary part |
| static @property CustomFloat im() { return CustomFloat(0.0f); } |
| |
| /// Initialize from any $(D real) compatible type. |
| this(F)(F input) if (__traits(compiles, cast(real) input )) |
| { |
| this = input; |
| } |
| |
| /// Self assignment |
| void opAssign(F:CustomFloat)(F input) |
| { |
| static if (flags & Flags.signed) |
| sign = input.sign; |
| exponent = input.exponent; |
| significand = input.significand; |
| } |
| |
| /// Assigns from any $(D real) compatible type. |
| void opAssign(F)(F input) |
| if (__traits(compiles, cast(real) input)) |
| { |
| import std.conv : text; |
| |
| static if (staticIndexOf!(Unqual!F, float, double, real) >= 0) |
| auto value = ToBinary!(Unqual!F)(input); |
| else |
| auto value = ToBinary!(real )(input); |
| |
| // Assign the sign bit |
| static if (~flags & Flags.signed) |
| assert((!value.sign) ^ ((flags&flags.negativeUnsigned) > 0), |
| "Incorrectly signed floating point value assigned to a " ~ |
| typeof(this).stringof ~ " (no sign support)."); |
| else |
| sign = value.sign; |
| |
| CommonType!(T_signed_exp ,value.T_signed_exp) exp = value.exponent; |
| CommonType!(T_sig, value.T_sig ) sig = value.significand; |
| |
| value.toNormalized(sig,exp); |
| fromNormalized(sig,exp); |
| |
| assert(exp <= exponent_max, text(typeof(this).stringof ~ |
| " exponent too large: " ,exp," > ",exponent_max, "\t",input,"\t",sig)); |
| assert(sig <= significand_max, text(typeof(this).stringof ~ |
| " significand too large: ",sig," > ",significand_max, |
| "\t",input,"\t",exp," ",exponent_max)); |
| exponent = cast(T_exp) exp; |
| significand = cast(T_sig) sig; |
| } |
| |
| /// Fetches the stored value either as a $(D float), $(D double) or $(D real). |
| @property F get(F)() |
| if (staticIndexOf!(Unqual!F, float, double, real) >= 0) |
| { |
| import std.conv : text; |
| |
| ToBinary!F result; |
| |
| static if (flags&Flags.signed) |
| result.sign = sign; |
| else |
| result.sign = (flags&flags.negativeUnsigned) > 0; |
| |
| CommonType!(T_signed_exp ,result.get.T_signed_exp ) exp = exponent; // Assign the exponent and fraction |
| CommonType!(T_sig, result.get.T_sig ) sig = significand; |
| |
| toNormalized(sig,exp); |
| result.fromNormalized(sig,exp); |
| assert(exp <= result.exponent_max, text("get exponent too large: " ,exp," > ",result.exponent_max) ); |
| assert(sig <= result.significand_max, text("get significand too large: ",sig," > ",result.significand_max) ); |
| result.exponent = cast(result.get.T_exp) exp; |
| result.significand = cast(result.get.T_sig) sig; |
| return result.set; |
| } |
| |
| ///ditto |
| T opCast(T)() if (__traits(compiles, get!T )) { return get!T; } |
| |
| /// Convert the CustomFloat to a real and perform the relavent operator on the result |
| real opUnary(string op)() |
| if (__traits(compiles, mixin(op~`(get!real)`)) || op=="++" || op=="--") |
| { |
| static if (op=="++" || op=="--") |
| { |
| auto result = get!real; |
| this = mixin(op~`result`); |
| return result; |
| } |
| else |
| return mixin(op~`get!real`); |
| } |
| |
| /// ditto |
| real opBinary(string op,T)(T b) |
| if (__traits(compiles, mixin(`get!real`~op~`b`))) |
| { |
| return mixin(`get!real`~op~`b`); |
| } |
| |
| /// ditto |
| real opBinaryRight(string op,T)(T a) |
| if ( __traits(compiles, mixin(`a`~op~`get!real`)) && |
| !__traits(compiles, mixin(`get!real`~op~`b`))) |
| { |
| return mixin(`a`~op~`get!real`); |
| } |
| |
| /// ditto |
| int opCmp(T)(auto ref T b) |
| if (__traits(compiles, cast(real) b)) |
| { |
| auto x = get!real; |
| auto y = cast(real) b; |
| return (x >= y)-(x <= y); |
| } |
| |
| /// ditto |
| void opOpAssign(string op, T)(auto ref T b) |
| if (__traits(compiles, mixin(`get!real`~op~`cast(real) b`))) |
| { |
| return mixin(`this = this `~op~` cast(real) b`); |
| } |
| |
| /// ditto |
| template toString() |
| { |
| import std.format : FormatSpec, formatValue; |
| // Needs to be a template because of DMD @@BUG@@ 13737. |
| void toString()(scope void delegate(const(char)[]) sink, FormatSpec!char fmt) |
| { |
| sink.formatValue(get!real, fmt); |
| } |
| } |
| } |
| |
| @safe unittest |
| { |
| import std.meta; |
| alias FPTypes = |
| AliasSeq!( |
| CustomFloat!(5, 10), |
| CustomFloat!(5, 11, CustomFloatFlags.ieee ^ CustomFloatFlags.signed), |
| CustomFloat!(1, 15, CustomFloatFlags.ieee ^ CustomFloatFlags.signed), |
| CustomFloat!(4, 3, CustomFloatFlags.ieee | CustomFloatFlags.probability ^ CustomFloatFlags.signed) |
| ); |
| |
| foreach (F; FPTypes) |
| { |
| auto x = F(0.125); |
| assert(x.get!float == 0.125F); |
| assert(x.get!double == 0.125); |
| |
| x -= 0.0625; |
| assert(x.get!float == 0.0625F); |
| assert(x.get!double == 0.0625); |
| |
| x *= 2; |
| assert(x.get!float == 0.125F); |
| assert(x.get!double == 0.125); |
| |
| x /= 4; |
| assert(x.get!float == 0.03125); |
| assert(x.get!double == 0.03125); |
| |
| x = 0.5; |
| x ^^= 4; |
| assert(x.get!float == 1 / 16.0F); |
| assert(x.get!double == 1 / 16.0); |
| } |
| } |
| |
| @system unittest |
| { |
| // @system due to to!string(CustomFloat) |
| import std.conv; |
| CustomFloat!(5, 10) y = CustomFloat!(5, 10)(0.125); |
| assert(y.to!string == "0.125"); |
| } |
| |
| /** |
| Defines the fastest type to use when storing temporaries of a |
| calculation intended to ultimately yield a result of type $(D F) |
| (where $(D F) must be one of $(D float), $(D double), or $(D |
| real)). When doing a multi-step computation, you may want to store |
| intermediate results as $(D FPTemporary!F). |
| |
| The necessity of $(D FPTemporary) stems from the optimized |
| floating-point operations and registers present in virtually all |
| processors. When adding numbers in the example above, the addition may |
| in fact be done in $(D real) precision internally. In that case, |
| storing the intermediate $(D result) in $(D double format) is not only |
| less precise, it is also (surprisingly) slower, because a conversion |
| from $(D real) to $(D double) is performed every pass through the |
| loop. This being a lose-lose situation, $(D FPTemporary!F) has been |
| defined as the $(I fastest) type to use for calculations at precision |
| $(D F). There is no need to define a type for the $(I most accurate) |
| calculations, as that is always $(D real). |
| |
| Finally, there is no guarantee that using $(D FPTemporary!F) will |
| always be fastest, as the speed of floating-point calculations depends |
| on very many factors. |
| */ |
| template FPTemporary(F) |
| if (isFloatingPoint!F) |
| { |
| version (X86) |
| alias FPTemporary = real; |
| else |
| alias FPTemporary = Unqual!F; |
| } |
| |
| /// |
| @safe unittest |
| { |
| import std.math : approxEqual; |
| |
| // Average numbers in an array |
| double avg(in double[] a) |
| { |
| if (a.length == 0) return 0; |
| FPTemporary!double result = 0; |
| foreach (e; a) result += e; |
| return result / a.length; |
| } |
| |
| auto a = [1.0, 2.0, 3.0]; |
| assert(approxEqual(avg(a), 2)); |
| } |
| |
| /** |
| Implements the $(HTTP tinyurl.com/2zb9yr, secant method) for finding a |
| root of the function $(D fun) starting from points $(D [xn_1, x_n]) |
| (ideally close to the root). $(D Num) may be $(D float), $(D double), |
| or $(D real). |
| */ |
| template secantMethod(alias fun) |
| { |
| import std.functional : unaryFun; |
| Num secantMethod(Num)(Num xn_1, Num xn) |
| { |
| auto fxn = unaryFun!(fun)(xn_1), d = xn_1 - xn; |
| typeof(fxn) fxn_1; |
| |
| xn = xn_1; |
| while (!approxEqual(d, 0) && isFinite(d)) |
| { |
| xn_1 = xn; |
| xn -= d; |
| fxn_1 = fxn; |
| fxn = unaryFun!(fun)(xn); |
| d *= -fxn / (fxn - fxn_1); |
| } |
| return xn; |
| } |
| } |
| |
| /// |
| @safe unittest |
| { |
| import std.math : approxEqual, cos; |
| |
| float f(float x) |
| { |
| return cos(x) - x*x*x; |
| } |
| auto x = secantMethod!(f)(0f, 1f); |
| assert(approxEqual(x, 0.865474)); |
| } |
| |
| @system unittest |
| { |
| // @system because of __gshared stderr |
| scope(failure) stderr.writeln("Failure testing secantMethod"); |
| float f(float x) |
| { |
| return cos(x) - x*x*x; |
| } |
| immutable x = secantMethod!(f)(0f, 1f); |
| assert(approxEqual(x, 0.865474)); |
| auto d = &f; |
| immutable y = secantMethod!(d)(0f, 1f); |
| assert(approxEqual(y, 0.865474)); |
| } |
| |
| |
| /** |
| * Return true if a and b have opposite sign. |
| */ |
| private bool oppositeSigns(T1, T2)(T1 a, T2 b) |
| { |
| return signbit(a) != signbit(b); |
| } |
| |
| public: |
| |
| /** Find a real root of a real function f(x) via bracketing. |
| * |
| * Given a function `f` and a range `[a .. b]` such that `f(a)` |
| * and `f(b)` have opposite signs or at least one of them equals ±0, |
| * returns the value of `x` in |
| * the range which is closest to a root of `f(x)`. If `f(x)` |
| * has more than one root in the range, one will be chosen |
| * arbitrarily. If `f(x)` returns NaN, NaN will be returned; |
| * otherwise, this algorithm is guaranteed to succeed. |
| * |
| * Uses an algorithm based on TOMS748, which uses inverse cubic |
| * interpolation whenever possible, otherwise reverting to parabolic |
| * or secant interpolation. Compared to TOMS748, this implementation |
| * improves worst-case performance by a factor of more than 100, and |
| * typical performance by a factor of 2. For 80-bit reals, most |
| * problems require 8 to 15 calls to `f(x)` to achieve full machine |
| * precision. The worst-case performance (pathological cases) is |
| * approximately twice the number of bits. |
| * |
| * References: "On Enclosing Simple Roots of Nonlinear Equations", |
| * G. Alefeld, F.A. Potra, Yixun Shi, Mathematics of Computation 61, |
| * pp733-744 (1993). Fortran code available from $(HTTP |
| * www.netlib.org,www.netlib.org) as algorithm TOMS478. |
| * |
| */ |
| T findRoot(T, DF, DT)(scope DF f, in T a, in T b, |
| scope DT tolerance) //= (T a, T b) => false) |
| if ( |
| isFloatingPoint!T && |
| is(typeof(tolerance(T.init, T.init)) : bool) && |
| is(typeof(f(T.init)) == R, R) && isFloatingPoint!R |
| ) |
| { |
| immutable fa = f(a); |
| if (fa == 0) |
| return a; |
| immutable fb = f(b); |
| if (fb == 0) |
| return b; |
| immutable r = findRoot(f, a, b, fa, fb, tolerance); |
| // Return the first value if it is smaller or NaN |
| return !(fabs(r[2]) > fabs(r[3])) ? r[0] : r[1]; |
| } |
| |
| ///ditto |
| T findRoot(T, DF)(scope DF f, in T a, in T b) |
| { |
| return findRoot(f, a, b, (T a, T b) => false); |
| } |
| |
| /** Find root of a real function f(x) by bracketing, allowing the |
| * termination condition to be specified. |
| * |
| * Params: |
| * |
| * f = Function to be analyzed |
| * |
| * ax = Left bound of initial range of `f` known to contain the |
| * root. |
| * |
| * bx = Right bound of initial range of `f` known to contain the |
| * root. |
| * |
| * fax = Value of $(D f(ax)). |
| * |
| * fbx = Value of $(D f(bx)). $(D fax) and $(D fbx) should have opposite signs. |
| * ($(D f(ax)) and $(D f(bx)) are commonly known in advance.) |
| * |
| * |
| * tolerance = Defines an early termination condition. Receives the |
| * current upper and lower bounds on the root. The |
| * delegate must return $(D true) when these bounds are |
| * acceptable. If this function always returns $(D false), |
| * full machine precision will be achieved. |
| * |
| * Returns: |
| * |
| * A tuple consisting of two ranges. The first two elements are the |
| * range (in `x`) of the root, while the second pair of elements |
| * are the corresponding function values at those points. If an exact |
| * root was found, both of the first two elements will contain the |
| * root, and the second pair of elements will be 0. |
| */ |
| Tuple!(T, T, R, R) findRoot(T, R, DF, DT)(scope DF f, in T ax, in T bx, in R fax, in R fbx, |
| scope DT tolerance) // = (T a, T b) => false) |
| if ( |
| isFloatingPoint!T && |
| is(typeof(tolerance(T.init, T.init)) : bool) && |
| is(typeof(f(T.init)) == R) && isFloatingPoint!R |
| ) |
| in |
| { |
| assert(!ax.isNaN() && !bx.isNaN(), "Limits must not be NaN"); |
| assert(signbit(fax) != signbit(fbx), "Parameters must bracket the root."); |
| } |
| body |
| { |
| // Author: Don Clugston. This code is (heavily) modified from TOMS748 |
| // (www.netlib.org). The changes to improve the worst-cast performance are |
| // entirely original. |
| |
| T a, b, d; // [a .. b] is our current bracket. d is the third best guess. |
| R fa, fb, fd; // Values of f at a, b, d. |
| bool done = false; // Has a root been found? |
| |
| // Allow ax and bx to be provided in reverse order |
| if (ax <= bx) |
| { |
| a = ax; fa = fax; |
| b = bx; fb = fbx; |
| } |
| else |
| { |
| a = bx; fa = fbx; |
| b = ax; fb = fax; |
| } |
| |
| // Test the function at point c; update brackets accordingly |
| void bracket(T c) |
| { |
| R fc = f(c); |
| if (fc == 0 || fc.isNaN()) // Exact solution, or NaN |
| { |
| a = c; |
| fa = fc; |
| d = c; |
| fd = fc; |
| done = true; |
| return; |
| } |
| |
| // Determine new enclosing interval |
| if (signbit(fa) != signbit(fc)) |
| { |
| d = b; |
| fd = fb; |
| b = c; |
| fb = fc; |
| } |
| else |
| { |
| d = a; |
| fd = fa; |
| a = c; |
| fa = fc; |
| } |
| } |
| |
| /* Perform a secant interpolation. If the result would lie on a or b, or if |
| a and b differ so wildly in magnitude that the result would be meaningless, |
| perform a bisection instead. |
| */ |
| static T secant_interpolate(T a, T b, R fa, R fb) |
| { |
| if (( ((a - b) == a) && b != 0) || (a != 0 && ((b - a) == b))) |
| { |
| // Catastrophic cancellation |
| if (a == 0) |
| a = copysign(T(0), b); |
| else if (b == 0) |
| b = copysign(T(0), a); |
| else if (signbit(a) != signbit(b)) |
| return 0; |
| T c = ieeeMean(a, b); |
| return c; |
| } |
| // avoid overflow |
| if (b - a > T.max) |
| return b / 2 + a / 2; |
| if (fb - fa > R.max) |
| return a - (b - a) / 2; |
| T c = a - (fa / (fb - fa)) * (b - a); |
| if (c == a || c == b) |
| return (a + b) / 2; |
| return c; |
| } |
| |
| /* Uses 'numsteps' newton steps to approximate the zero in [a .. b] of the |
| quadratic polynomial interpolating f(x) at a, b, and d. |
| Returns: |
| The approximate zero in [a .. b] of the quadratic polynomial. |
| */ |
| T newtonQuadratic(int numsteps) |
| { |
| // Find the coefficients of the quadratic polynomial. |
| immutable T a0 = fa; |
| immutable T a1 = (fb - fa)/(b - a); |
| immutable T a2 = ((fd - fb)/(d - b) - a1)/(d - a); |
| |
| // Determine the starting point of newton steps. |
| T c = oppositeSigns(a2, fa) ? a : b; |
| |
| // start the safeguarded newton steps. |
| foreach (int i; 0 .. numsteps) |
| { |
| immutable T pc = a0 + (a1 + a2 * (c - b))*(c - a); |
| immutable T pdc = a1 + a2*((2 * c) - (a + b)); |
| if (pdc == 0) |
| return a - a0 / a1; |
| else |
| c = c - pc / pdc; |
| } |
| return c; |
| } |
| |
| // On the first iteration we take a secant step: |
| if (fa == 0 || fa.isNaN()) |
| { |
| done = true; |
| b = a; |
| fb = fa; |
| } |
| else if (fb == 0 || fb.isNaN()) |
| { |
| done = true; |
| a = b; |
| fa = fb; |
| } |
| else |
| { |
| bracket(secant_interpolate(a, b, fa, fb)); |
| } |
| |
| // Starting with the second iteration, higher-order interpolation can |
| // be used. |
| int itnum = 1; // Iteration number |
| int baditer = 1; // Num bisections to take if an iteration is bad. |
| T c, e; // e is our fourth best guess |
| R fe; |
| |
| whileloop: |
| while (!done && (b != nextUp(a)) && !tolerance(a, b)) |
| { |
| T a0 = a, b0 = b; // record the brackets |
| |
| // Do two higher-order (cubic or parabolic) interpolation steps. |
| foreach (int QQ; 0 .. 2) |
| { |
| // Cubic inverse interpolation requires that |
| // all four function values fa, fb, fd, and fe are distinct; |
| // otherwise use quadratic interpolation. |
| bool distinct = (fa != fb) && (fa != fd) && (fa != fe) |
| && (fb != fd) && (fb != fe) && (fd != fe); |
| // The first time, cubic interpolation is impossible. |
| if (itnum<2) distinct = false; |
| bool ok = distinct; |
| if (distinct) |
| { |
| // Cubic inverse interpolation of f(x) at a, b, d, and e |
| immutable q11 = (d - e) * fd / (fe - fd); |
| immutable q21 = (b - d) * fb / (fd - fb); |
| immutable q31 = (a - b) * fa / (fb - fa); |
| immutable d21 = (b - d) * fd / (fd - fb); |
| immutable d31 = (a - b) * fb / (fb - fa); |
| |
| immutable q22 = (d21 - q11) * fb / (fe - fb); |
| immutable q32 = (d31 - q21) * fa / (fd - fa); |
| immutable d32 = (d31 - q21) * fd / (fd - fa); |
| immutable q33 = (d32 - q22) * fa / (fe - fa); |
| c = a + (q31 + q32 + q33); |
| if (c.isNaN() || (c <= a) || (c >= b)) |
| { |
| // DAC: If the interpolation predicts a or b, it's |
| // probable that it's the actual root. Only allow this if |
| // we're already close to the root. |
| if (c == a && a - b != a) |
| { |
| c = nextUp(a); |
| } |
| else if (c == b && a - b != -b) |
| { |
| c = nextDown(b); |
| } |
| else |
| { |
| ok = false; |
| } |
| } |
| } |
| if (!ok) |
| { |
| // DAC: Alefeld doesn't explain why the number of newton steps |
| // should vary. |
| c = newtonQuadratic(distinct ? 3 : 2); |
| if (c.isNaN() || (c <= a) || (c >= b)) |
| { |
| // Failure, try a secant step: |
| c = secant_interpolate(a, b, fa, fb); |
| } |
| } |
| ++itnum; |
| e = d; |
| fe = fd; |
| bracket(c); |
| if (done || ( b == nextUp(a)) || tolerance(a, b)) |
| break whileloop; |
| if (itnum == 2) |
| continue whileloop; |
| } |
| |
| // Now we take a double-length secant step: |
| T u; |
| R fu; |
| if (fabs(fa) < fabs(fb)) |
| { |
| u = a; |
| fu = fa; |
| } |
| else |
| { |
| u = b; |
| fu = fb; |
| } |
| c = u - 2 * (fu / (fb - fa)) * (b - a); |
| |
| // DAC: If the secant predicts a value equal to an endpoint, it's |
| // probably false. |
| if (c == a || c == b || c.isNaN() || fabs(c - u) > (b - a) / 2) |
| { |
| if ((a-b) == a || (b-a) == b) |
| { |
| if ((a>0 && b<0) || (a<0 && b>0)) |
| c = 0; |
| else |
| { |
| if (a == 0) |
| c = ieeeMean(copysign(T(0), b), b); |
| else if (b == 0) |
| c = ieeeMean(copysign(T(0), a), a); |
| else |
| c = ieeeMean(a, b); |
| } |
| } |
| else |
| { |
| c = a + (b - a) / 2; |
| } |
| } |
| e = d; |
| fe = fd; |
| bracket(c); |
| if (done || (b == nextUp(a)) || tolerance(a, b)) |
| break; |
| |
| // IMPROVE THE WORST-CASE PERFORMANCE |
| // We must ensure that the bounds reduce by a factor of 2 |
| // in binary space! every iteration. If we haven't achieved this |
| // yet, or if we don't yet know what the exponent is, |
| // perform a binary chop. |
| |
| if ((a == 0 || b == 0 || |
| (fabs(a) >= T(0.5) * fabs(b) && fabs(b) >= T(0.5) * fabs(a))) |
| && (b - a) < T(0.25) * (b0 - a0)) |
| { |
| baditer = 1; |
| continue; |
| } |
| |
| // DAC: If this happens on consecutive iterations, we probably have a |
| // pathological function. Perform a number of bisections equal to the |
| // total number of consecutive bad iterations. |
| |
| if ((b - a) < T(0.25) * (b0 - a0)) |
| baditer = 1; |
| foreach (int QQ; 0 .. baditer) |
| { |
| e = d; |
| fe = fd; |
| |
| T w; |
| if ((a>0 && b<0) || (a<0 && b>0)) |
| w = 0; |
| else |
| { |
| T usea = a; |
| T useb = b; |
| if (a == 0) |
| usea = copysign(T(0), b); |
| else if (b == 0) |
| useb = copysign(T(0), a); |
| w = ieeeMean(usea, useb); |
| } |
| bracket(w); |
| } |
| ++baditer; |
| } |
| return Tuple!(T, T, R, R)(a, b, fa, fb); |
| } |
| |
| ///ditto |
| Tuple!(T, T, R, R) findRoot(T, R, DF)(scope DF f, in T ax, in T bx, in R fax, in R fbx) |
| { |
| return findRoot(f, ax, bx, fax, fbx, (T a, T b) => false); |
| } |
| |
| ///ditto |
| T findRoot(T, R)(scope R delegate(T) f, in T a, in T b, |
| scope bool delegate(T lo, T hi) tolerance = (T a, T b) => false) |
| { |
| return findRoot!(T, R delegate(T), bool delegate(T lo, T hi))(f, a, b, tolerance); |
| } |
| |
| @safe nothrow unittest |
| { |
| int numProblems = 0; |
| int numCalls; |
| |
| void testFindRoot(real delegate(real) @nogc @safe nothrow pure f , real x1, real x2) @nogc @safe nothrow pure |
| { |
| //numCalls=0; |
| //++numProblems; |
| assert(!x1.isNaN() && !x2.isNaN()); |
| assert(signbit(x1) != signbit(x2)); |
| auto result = findRoot(f, x1, x2, f(x1), f(x2), |
| (real lo, real hi) { return false; }); |
| |
| auto flo = f(result[0]); |
| auto fhi = f(result[1]); |
| if (flo != 0) |
| { |
| assert(oppositeSigns(flo, fhi)); |
| } |
| } |
| |
| // Test functions |
| real cubicfn(real x) @nogc @safe nothrow pure |
| { |
| //++numCalls; |
| if (x>float.max) |
| x = float.max; |
| if (x<-double.max) |
| x = -double.max; |
| // This has a single real root at -59.286543284815 |
| return 0.386*x*x*x + 23*x*x + 15.7*x + 525.2; |
| } |
| // Test a function with more than one root. |
| real multisine(real x) { ++numCalls; return sin(x); } |
| //testFindRoot( &multisine, 6, 90); |
| //testFindRoot(&cubicfn, -100, 100); |
| //testFindRoot( &cubicfn, -double.max, real.max); |
| |
| |
| /* Tests from the paper: |
| * "On Enclosing Simple Roots of Nonlinear Equations", G. Alefeld, F.A. Potra, |
| * Yixun Shi, Mathematics of Computation 61, pp733-744 (1993). |
| */ |
| // Parameters common to many alefeld tests. |
| int n; |
| real ale_a, ale_b; |
| |
| int powercalls = 0; |
| |
| real power(real x) |
| { |
| ++powercalls; |
| ++numCalls; |
| return pow(x, n) + double.min_normal; |
| } |
| int [] power_nvals = [3, 5, 7, 9, 19, 25]; |
| // Alefeld paper states that pow(x,n) is a very poor case, where bisection |
| // outperforms his method, and gives total numcalls = |
| // 921 for bisection (2.4 calls per bit), 1830 for Alefeld (4.76/bit), |
| // 2624 for brent (6.8/bit) |
| // ... but that is for double, not real80. |
| // This poor performance seems mainly due to catastrophic cancellation, |
| // which is avoided here by the use of ieeeMean(). |
| // I get: 231 (0.48/bit). |
| // IE this is 10X faster in Alefeld's worst case |
| numProblems=0; |
| foreach (k; power_nvals) |
| { |
| n = k; |
| //testFindRoot(&power, -1, 10); |
| } |
| |
| int powerProblems = numProblems; |
| |
| // Tests from Alefeld paper |
| |
| int [9] alefeldSums; |
| real alefeld0(real x) |
| { |
| ++alefeldSums[0]; |
| ++numCalls; |
| real q = sin(x) - x/2; |
| for (int i=1; i<20; ++i) |
| q+=(2*i-5.0)*(2*i-5.0)/((x-i*i)*(x-i*i)*(x-i*i)); |
| return q; |
| } |
| real alefeld1(real x) |
| { |
| ++numCalls; |
| ++alefeldSums[1]; |
| return ale_a*x + exp(ale_b * x); |
| } |
| real alefeld2(real x) |
| { |
| ++numCalls; |
| ++alefeldSums[2]; |
| return pow(x, n) - ale_a; |
| } |
| real alefeld3(real x) |
| { |
| ++numCalls; |
| ++alefeldSums[3]; |
| return (1.0 +pow(1.0L-n, 2))*x - pow(1.0L-n*x, 2); |
| } |
| real alefeld4(real x) |
| { |
| ++numCalls; |
| ++alefeldSums[4]; |
| return x*x - pow(1-x, n); |
| } |
| real alefeld5(real x) |
| { |
| ++numCalls; |
| ++alefeldSums[5]; |
| return (1+pow(1.0L-n, 4))*x - pow(1.0L-n*x, 4); |
| } |
| real alefeld6(real x) |
| { |
| ++numCalls; |
| ++alefeldSums[6]; |
| return exp(-n*x)*(x-1.01L) + pow(x, n); |
| } |
| real alefeld7(real x) |
| { |
| ++numCalls; |
| ++alefeldSums[7]; |
| return (n*x-1)/((n-1)*x); |
| } |
| |
| numProblems=0; |
| //testFindRoot(&alefeld0, PI_2, PI); |
| for (n=1; n <= 10; ++n) |
| { |
| //testFindRoot(&alefeld0, n*n+1e-9L, (n+1)*(n+1)-1e-9L); |
| } |
| ale_a = -40; ale_b = -1; |
| //testFindRoot(&alefeld1, -9, 31); |
| ale_a = -100; ale_b = -2; |
| //testFindRoot(&alefeld1, -9, 31); |
| ale_a = -200; ale_b = -3; |
| //testFindRoot(&alefeld1, -9, 31); |
| int [] nvals_3 = [1, 2, 5, 10, 15, 20]; |
| int [] nvals_5 = [1, 2, 4, 5, 8, 15, 20]; |
| int [] nvals_6 = [1, 5, 10, 15, 20]; |
| int [] nvals_7 = [2, 5, 15, 20]; |
| |
| for (int i=4; i<12; i+=2) |
| { |
| n = i; |
| ale_a = 0.2; |
| //testFindRoot(&alefeld2, 0, 5); |
| ale_a=1; |
| //testFindRoot(&alefeld2, 0.95, 4.05); |
| //testFindRoot(&alefeld2, 0, 1.5); |
| } |
| foreach (i; nvals_3) |
| { |
| n=i; |
| //testFindRoot(&alefeld3, 0, 1); |
| } |
| foreach (i; nvals_3) |
| { |
| n=i; |
| //testFindRoot(&alefeld4, 0, 1); |
| } |
| foreach (i; nvals_5) |
| { |
| n=i; |
| //testFindRoot(&alefeld5, 0, 1); |
| } |
| foreach (i; nvals_6) |
| { |
| n=i; |
| //testFindRoot(&alefeld6, 0, 1); |
| } |
| foreach (i; nvals_7) |
| { |
| n=i; |
| //testFindRoot(&alefeld7, 0.01L, 1); |
| } |
| real worstcase(real x) |
| { |
| ++numCalls; |
| return x<0.3*real.max? -0.999e-3 : 1.0; |
| } |
| //testFindRoot(&worstcase, -real.max, real.max); |
| |
| // just check that the double + float cases compile |
| //findRoot((double x){ return 0.0; }, -double.max, double.max); |
| //findRoot((float x){ return 0.0f; }, -float.max, float.max); |
| |
| /* |
| int grandtotal=0; |
| foreach (calls; alefeldSums) |
| { |
| grandtotal+=calls; |
| } |
| grandtotal-=2*numProblems; |
| printf("\nALEFELD TOTAL = %d avg = %f (alefeld avg=19.3 for double)\n", |
| grandtotal, (1.0*grandtotal)/numProblems); |
| powercalls -= 2*powerProblems; |
| printf("POWER TOTAL = %d avg = %f ", powercalls, |
| (1.0*powercalls)/powerProblems); |
| */ |
| //Issue 14231 |
| auto xp = findRoot((float x) => x, 0f, 1f); |
| auto xn = findRoot((float x) => x, -1f, -0f); |
| } |
| |
| //regression control |
| @system unittest |
| { |
| // @system due to the case in the 2nd line |
| static assert(__traits(compiles, findRoot((float x)=>cast(real) x, float.init, float.init))); |
| static assert(__traits(compiles, findRoot!real((x)=>cast(double) x, real.init, real.init))); |
| static assert(__traits(compiles, findRoot((real x)=>cast(double) x, real.init, real.init))); |
| } |
| |
| /++ |
| Find a real minimum of a real function `f(x)` via bracketing. |
| Given a function `f` and a range `(ax .. bx)`, |
| returns the value of `x` in the range which is closest to a minimum of `f(x)`. |
| `f` is never evaluted at the endpoints of `ax` and `bx`. |
| If `f(x)` has more than one minimum in the range, one will be chosen arbitrarily. |
| If `f(x)` returns NaN or -Infinity, `(x, f(x), NaN)` will be returned; |
| otherwise, this algorithm is guaranteed to succeed. |
| |
| Params: |
| f = Function to be analyzed |
| ax = Left bound of initial range of f known to contain the minimum. |
| bx = Right bound of initial range of f known to contain the minimum. |
| relTolerance = Relative tolerance. |
| absTolerance = Absolute tolerance. |
| |
| Preconditions: |
| `ax` and `bx` shall be finite reals. $(BR) |
| $(D relTolerance) shall be normal positive real. $(BR) |
| $(D absTolerance) shall be normal positive real no less then $(D T.epsilon*2). |
| |
| Returns: |
| A tuple consisting of `x`, `y = f(x)` and `error = 3 * (absTolerance * fabs(x) + relTolerance)`. |
| |
| The method used is a combination of golden section search and |
| successive parabolic interpolation. Convergence is never much slower |
| than that for a Fibonacci search. |
| |
| References: |
| "Algorithms for Minimization without Derivatives", Richard Brent, Prentice-Hall, Inc. (1973) |
| |
| See_Also: $(LREF findRoot), $(REF isNormal, std,math) |
| +/ |
| Tuple!(T, "x", Unqual!(ReturnType!DF), "y", T, "error") |
| findLocalMin(T, DF)( |
| scope DF f, |
| in T ax, |
| in T bx, |
| in T relTolerance = sqrt(T.epsilon), |
| in T absTolerance = sqrt(T.epsilon), |
| ) |
| if (isFloatingPoint!T |
| && __traits(compiles, {T _ = DF.init(T.init);})) |
| in |
| { |
| assert(isFinite(ax), "ax is not finite"); |
| assert(isFinite(bx), "bx is not finite"); |
| assert(isNormal(relTolerance), "relTolerance is not normal floating point number"); |
| assert(isNormal(absTolerance), "absTolerance is not normal floating point number"); |
| assert(relTolerance >= 0, "absTolerance is not positive"); |
| assert(absTolerance >= T.epsilon*2, "absTolerance is not greater then `2*T.epsilon`"); |
| } |
| out (result) |
| { |
| assert(isFinite(result.x)); |
| } |
| body |
| { |
| alias R = Unqual!(CommonType!(ReturnType!DF, T)); |
| // c is the squared inverse of the golden ratio |
| // (3 - sqrt(5))/2 |
| // Value obtained from Wolfram Alpha. |
| enum T c = 0x0.61c8864680b583ea0c633f9fa31237p+0L; |
| enum T cm1 = 0x0.9e3779b97f4a7c15f39cc0605cedc8p+0L; |
| R tolerance; |
| T a = ax > bx ? bx : ax; |
| T b = ax > bx ? ax : bx; |
| // sequence of declarations suitable for SIMD instructions |
| T v = a * cm1 + b * c; |
| assert(isFinite(v)); |
| R fv = f(v); |
| if (isNaN(fv) || fv == -T.infinity) |
| { |
| return typeof(return)(v, fv, T.init); |
| } |
| T w = v; |
| R fw = fv; |
| T x = v; |
| R fx = fv; |
| size_t i; |
| for (R d = 0, e = 0;;) |
| { |
| i++; |
| T m = (a + b) / 2; |
| // This fix is not part of the original algorithm |
| if (!isFinite(m)) // fix infinity loop. Issue can be reproduced in R. |
| { |
| m = a / 2 + b / 2; |
| if (!isFinite(m)) // fast-math compiler switch is enabled |
| { |
| //SIMD instructions can be used by compiler, do not reduce declarations |
| int a_exp = void; |
| int b_exp = void; |
| immutable an = frexp(a, a_exp); |
| immutable bn = frexp(b, b_exp); |
| immutable am = ldexp(an, a_exp-1); |
| immutable bm = ldexp(bn, b_exp-1); |
| m = am + bm; |
| if (!isFinite(m)) // wrong input: constraints are disabled in release mode |
| { |
| return typeof(return).init; |
| } |
| } |
| } |
| tolerance = absTolerance * fabs(x) + relTolerance; |
| immutable t2 = tolerance * 2; |
| // check stopping criterion |
| if (!(fabs(x - m) > t2 - (b - a) / 2)) |
| { |
| break; |
| } |
| R p = 0; |
| R q = 0; |
| R r = 0; |
| // fit parabola |
| if (fabs(e) > tolerance) |
| { |
| immutable xw = x - w; |
| immutable fxw = fx - fw; |
| immutable xv = x - v; |
| immutable fxv = fx - fv; |
| immutable xwfxv = xw * fxv; |
| immutable xvfxw = xv * fxw; |
| p = xv * xvfxw - xw * xwfxv; |
| q = (xvfxw - xwfxv) * 2; |
| if (q > 0) |
| p = -p; |
| else |
| q = -q; |
| r = e; |
| e = d; |
| } |
| T u; |
| // a parabolic-interpolation step |
| if (fabs(p) < fabs(q * r / 2) && p > q * (a - x) && p < q * (b - x)) |
| { |
| d = p / q; |
| u = x + d; |
| // f must not be evaluated too close to a or b |
| if (u - a < t2 || b - u < t2) |
| d = x < m ? tolerance : -tolerance; |
| } |
| // a golden-section step |
| else |
| { |
| e = (x < m ? b : a) - x; |
| d = c * e; |
| } |
| // f must not be evaluated too close to x |
| u = x + (fabs(d) >= tolerance ? d : d > 0 ? tolerance : -tolerance); |
| immutable fu = f(u); |
| if (isNaN(fu) || fu == -T.infinity) |
| { |
| return typeof(return)(u, fu, T.init); |
| } |
| // update a, b, v, w, and x |
| if (fu <= fx) |
| { |
| u < x ? b : a = x; |
| v = w; fv = fw; |
| w = x; fw = fx; |
| x = u; fx = fu; |
| } |
| else |
| { |
| u < x ? a : b = u; |
| if (fu <= fw || w == x) |
| { |
| v = w; fv = fw; |
| w = u; fw = fu; |
| } |
| else if (fu <= fv || v == x || v == w) |
| { // do not remove this braces |
| v = u; fv = fu; |
| } |
| } |
| } |
| return typeof(return)(x, fx, tolerance * 3); |
| } |
| |
| /// |
| @safe unittest |
| { |
| import std.math : approxEqual; |
| |
| auto ret = findLocalMin((double x) => (x-4)^^2, -1e7, 1e7); |
| assert(ret.x.approxEqual(4.0)); |
| assert(ret.y.approxEqual(0.0)); |
| } |
| |
| @safe unittest |
| { |
| import std.meta : AliasSeq; |
| foreach (T; AliasSeq!(double, float, real)) |
| { |
| { |
| auto ret = findLocalMin!T((T x) => (x-4)^^2, T.min_normal, 1e7); |
| assert(ret.x.approxEqual(T(4))); |
| assert(ret.y.approxEqual(T(0))); |
| } |
| { |
| auto ret = findLocalMin!T((T x) => fabs(x-1), -T.max/4, T.max/4, T.min_normal, 2*T.epsilon); |
| assert(approxEqual(ret.x, T(1))); |
| assert(approxEqual(ret.y, T(0))); |
| assert(ret.error <= 10 * T.epsilon); |
| } |
| { |
| auto ret = findLocalMin!T((T x) => T.init, 0, 1, T.min_normal, 2*T.epsilon); |
| assert(!ret.x.isNaN); |
| assert(ret.y.isNaN); |
| assert(ret.error.isNaN); |
| } |
| { |
| auto ret = findLocalMin!T((T x) => log(x), 0, 1, T.min_normal, 2*T.epsilon); |
| assert(ret.error < 3.00001 * ((2*T.epsilon)*fabs(ret.x)+ T.min_normal)); |
| assert(ret.x >= 0 && ret.x <= ret.error); |
| } |
| { |
| auto ret = findLocalMin!T((T x) => log(x), 0, T.max, T.min_normal, 2*T.epsilon); |
| assert(ret.y < -18); |
| assert(ret.error < 5e-08); |
| assert(ret.x >= 0 && ret.x <= ret.error); |
| } |
| { |
| auto ret = findLocalMin!T((T x) => -fabs(x), -1, 1, T.min_normal, 2*T.epsilon); |
| assert(ret.x.fabs.approxEqual(T(1))); |
| assert(ret.y.fabs.approxEqual(T(1))); |
| assert(ret.error.approxEqual(T(0))); |
| } |
| } |
| } |
| |
| /** |
| Computes $(LINK2 https://en.wikipedia.org/wiki/Euclidean_distance, |
| Euclidean distance) between input ranges $(D a) and |
| $(D b). The two ranges must have the same length. The three-parameter |
| version stops computation as soon as the distance is greater than or |
| equal to $(D limit) (this is useful to save computation if a small |
| distance is sought). |
| */ |
| CommonType!(ElementType!(Range1), ElementType!(Range2)) |
| euclideanDistance(Range1, Range2)(Range1 a, Range2 b) |
| if (isInputRange!(Range1) && isInputRange!(Range2)) |
| { |
| enum bool haveLen = hasLength!(Range1) && hasLength!(Range2); |
| static if (haveLen) assert(a.length == b.length); |
| Unqual!(typeof(return)) result = 0; |
| for (; !a.empty; a.popFront(), b.popFront()) |
| { |
| immutable t = a.front - b.front; |
| result += t * t; |
| } |
| static if (!haveLen) assert(b.empty); |
| return sqrt(result); |
| } |
| |
| /// Ditto |
| CommonType!(ElementType!(Range1), ElementType!(Range2)) |
| euclideanDistance(Range1, Range2, F)(Range1 a, Range2 b, F limit) |
| if (isInputRange!(Range1) && isInputRange!(Range2)) |
| { |
| limit *= limit; |
| enum bool haveLen = hasLength!(Range1) && hasLength!(Range2); |
| static if (haveLen) assert(a.length == b.length); |
| Unqual!(typeof(return)) result = 0; |
| for (; ; a.popFront(), b.popFront()) |
| { |
| if (a.empty) |
| { |
| static if (!haveLen) assert(b.empty); |
| break; |
| } |
| immutable t = a.front - b.front; |
| result += t * t; |
| if (result >= limit) break; |
| } |
| return sqrt(result); |
| } |
| |
| @safe unittest |
| { |
| import std.meta : AliasSeq; |
| foreach (T; AliasSeq!(double, const double, immutable double)) |
| { |
| T[] a = [ 1.0, 2.0, ]; |
| T[] b = [ 4.0, 6.0, ]; |
| assert(euclideanDistance(a, b) == 5); |
| assert(euclideanDistance(a, b, 5) == 5); |
| assert(euclideanDistance(a, b, 4) == 5); |
| assert(euclideanDistance(a, b, 2) == 3); |
| } |
| } |
| |
| /** |
| Computes the $(LINK2 https://en.wikipedia.org/wiki/Dot_product, |
| dot product) of input ranges $(D a) and $(D |
| b). The two ranges must have the same length. If both ranges define |
| length, the check is done once; otherwise, it is done at each |
| iteration. |
| */ |
| CommonType!(ElementType!(Range1), ElementType!(Range2)) |
| dotProduct(Range1, Range2)(Range1 a, Range2 b) |
| if (isInputRange!(Range1) && isInputRange!(Range2) && |
| !(isArray!(Range1) && isArray!(Range2))) |
| { |
| enum bool haveLen = hasLength!(Range1) && hasLength!(Range2); |
| static if (haveLen) assert(a.length == b.length); |
| Unqual!(typeof(return)) result = 0; |
| for (; !a.empty; a.popFront(), b.popFront()) |
| { |
| result += a.front * b.front; |
| } |
| static if (!haveLen) assert(b.empty); |
| return result; |
| } |
| |
| /// Ditto |
| CommonType!(F1, F2) |
| dotProduct(F1, F2)(in F1[] avector, in F2[] bvector) |
| { |
| immutable n = avector.length; |
| assert(n == bvector.length); |
| auto avec = avector.ptr, bvec = bvector.ptr; |
| Unqual!(typeof(return)) sum0 = 0, sum1 = 0; |
| |
| const all_endp = avec + n; |
| const smallblock_endp = avec + (n & ~3); |
| const bigblock_endp = avec + (n & ~15); |
| |
| for (; avec != bigblock_endp; avec += 16, bvec += 16) |
| { |
| sum0 += avec[0] * bvec[0]; |
| sum1 += avec[1] * bvec[1]; |
| sum0 += avec[2] * bvec[2]; |
| sum1 += avec[3] * bvec[3]; |
| sum0 += avec[4] * bvec[4]; |
| sum1 += avec[5] * bvec[5]; |
| sum0 += avec[6] * bvec[6]; |
| sum1 += avec[7] * bvec[7]; |
| sum0 += avec[8] * bvec[8]; |
| sum1 += avec[9] * bvec[9]; |
| sum0 += avec[10] * bvec[10]; |
| sum1 += avec[11] * bvec[11]; |
| sum0 += avec[12] * bvec[12]; |
| sum1 += avec[13] * bvec[13]; |
| sum0 += avec[14] * bvec[14]; |
| sum1 += avec[15] * bvec[15]; |
| } |
| |
| for (; avec != smallblock_endp; avec += 4, bvec += 4) |
| { |
| sum0 += avec[0] * bvec[0]; |
| sum1 += avec[1] * bvec[1]; |
| sum0 += avec[2] * bvec[2]; |
| sum1 += avec[3] * bvec[3]; |
| } |
| |
| sum0 += sum1; |
| |
| /* Do trailing portion in naive loop. */ |
| while (avec != all_endp) |
| { |
| sum0 += *avec * *bvec; |
| ++avec; |
| ++bvec; |
| } |
| |
| return sum0; |
| } |
| |
| @system unittest |
| { |
| // @system due to dotProduct and assertCTFEable |
| import std.exception : assertCTFEable; |
| import std.meta : AliasSeq; |
| foreach (T; AliasSeq!(double, const double, immutable double)) |
| { |
| T[] a = [ 1.0, 2.0, ]; |
| T[] b = [ 4.0, 6.0, ]; |
| assert(dotProduct(a, b) == 16); |
| assert(dotProduct([1, 3, -5], [4, -2, -1]) == 3); |
| } |
| |
| // Make sure the unrolled loop codepath gets tested. |
| static const x = |
| [1.0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]; |
| static const y = |
| [2.0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; |
| assertCTFEable!({ assert(dotProduct(x, y) == 2280); }); |
| } |
| |
| /** |
| Computes the $(LINK2 https://en.wikipedia.org/wiki/Cosine_similarity, |
| cosine similarity) of input ranges $(D a) and $(D |
| b). The two ranges must have the same length. If both ranges define |
| length, the check is done once; otherwise, it is done at each |
| iteration. If either range has all-zero elements, return 0. |
| */ |
| CommonType!(ElementType!(Range1), ElementType!(Range2)) |
| cosineSimilarity(Range1, Range2)(Range1 a, Range2 b) |
| if (isInputRange!(Range1) && isInputRange!(Range2)) |
| { |
| enum bool haveLen = hasLength!(Range1) && hasLength!(Range2); |
| static if (haveLen) assert(a.length == b.length); |
| Unqual!(typeof(return)) norma = 0, normb = 0, dotprod = 0; |
| for (; !a.empty; a.popFront(), b.popFront()) |
| { |
| immutable t1 = a.front, t2 = b.front; |
| norma += t1 * t1; |
| normb += t2 * t2; |
| dotprod += t1 * t2; |
| } |
| static if (!haveLen) assert(b.empty); |
| if (norma == 0 || normb == 0) return 0; |
| return dotprod / sqrt(norma * normb); |
| } |
| |
| @safe unittest |
| { |
| import std.meta : AliasSeq; |
| foreach (T; AliasSeq!(double, const double, immutable double)) |
| { |
| T[] a = [ 1.0, 2.0, ]; |
| T[] b = [ 4.0, 3.0, ]; |
| assert(approxEqual( |
| cosineSimilarity(a, b), 10.0 / sqrt(5.0 * 25), |
| 0.01)); |
| } |
| } |
| |
| /** |
| Normalizes values in $(D range) by multiplying each element with a |
| number chosen such that values sum up to $(D sum). If elements in $(D |
| range) sum to zero, assigns $(D sum / range.length) to |
| all. Normalization makes sense only if all elements in $(D range) are |
| positive. $(D normalize) assumes that is the case without checking it. |
| |
| Returns: $(D true) if normalization completed normally, $(D false) if |
| all elements in $(D range) were zero or if $(D range) is empty. |
| */ |
| bool normalize(R)(R range, ElementType!(R) sum = 1) |
| if (isForwardRange!(R)) |
| { |
| ElementType!(R) s = 0; |
| // Step 1: Compute sum and length of the range |
| static if (hasLength!(R)) |
| { |
| const length = range.length; |
| foreach (e; range) |
| { |
| s += e; |
| } |
| } |
| else |
| { |
| uint length = 0; |
| foreach (e; range) |
| { |
| s += e; |
| ++length; |
| } |
| } |
| // Step 2: perform normalization |
| if (s == 0) |
| { |
| if (length) |
| { |
| immutable f = sum / range.length; |
| foreach (ref e; range) e = f; |
| } |
| return false; |
| } |
| // The path most traveled |
| assert(s >= 0); |
| immutable f = sum / s; |
| foreach (ref e; range) |
| e *= f; |
| return true; |
| } |
| |
| /// |
| @safe unittest |
| { |
| double[] a = []; |
| assert(!normalize(a)); |
| a = [ 1.0, 3.0 ]; |
| assert(normalize(a)); |
| assert(a == [ 0.25, 0.75 ]); |
| a = [ 0.0, 0.0 ]; |
| assert(!normalize(a)); |
| assert(a == [ 0.5, 0.5 ]); |
| } |
| |
| /** |
| Compute the sum of binary logarithms of the input range $(D r). |
| The error of this method is much smaller than with a naive sum of log2. |
| */ |
| ElementType!Range sumOfLog2s(Range)(Range r) |
| if (isInputRange!Range && isFloatingPoint!(ElementType!Range)) |
| { |
| long exp = 0; |
| Unqual!(typeof(return)) x = 1; |
| foreach (e; r) |
| { |
| if (e < 0) |
| return typeof(return).nan; |
| int lexp = void; |
| x *= frexp(e, lexp); |
| exp += lexp; |
| if (x < 0.5) |
| { |
| x *= 2; |
| exp--; |
| } |
| } |
| return exp + log2(x); |
| } |
| |
| /// |
| @safe unittest |
| { |
| import std.math : isNaN; |
| |
| assert(sumOfLog2s(new double[0]) == 0); |
| assert(sumOfLog2s([0.0L]) == -real.infinity); |
| assert(sumOfLog2s([-0.0L]) == -real.infinity); |
| assert(sumOfLog2s([2.0L]) == 1); |
| assert(sumOfLog2s([-2.0L]).isNaN()); |
| assert(sumOfLog2s([real.nan]).isNaN()); |
| assert(sumOfLog2s([-real.nan]).isNaN()); |
| assert(sumOfLog2s([real.infinity]) == real.infinity); |
| assert(sumOfLog2s([-real.infinity]).isNaN()); |
| assert(sumOfLog2s([ 0.25, 0.25, 0.25, 0.125 ]) == -9); |
| } |
| |
| /** |
| Computes $(LINK2 https://en.wikipedia.org/wiki/Entropy_(information_theory), |
| _entropy) of input range $(D r) in bits. This |
| function assumes (without checking) that the values in $(D r) are all |
| in $(D [0, 1]). For the entropy to be meaningful, often $(D r) should |
| be normalized too (i.e., its values should sum to 1). The |
| two-parameter version stops evaluating as soon as the intermediate |
| result is greater than or equal to $(D max). |
| */ |
| ElementType!Range entropy(Range)(Range r) |
| if (isInputRange!Range) |
| { |
| Unqual!(typeof(return)) result = 0.0; |
| for (;!r.empty; r.popFront) |
| { |
| if (!r.front) continue; |
| result -= r.front * log2(r.front); |
| } |
| return result; |
| } |
| |
| /// Ditto |
| ElementType!Range entropy(Range, F)(Range r, F max) |
| if (isInputRange!Range && |
| !is(CommonType!(ElementType!Range, F) == void)) |
| { |
| Unqual!(typeof(return)) result = 0.0; |
| for (;!r.empty; r.popFront) |
| { |
| if (!r.front) continue; |
| result -= r.front * log2(r.front); |
| if (result >= max) break; |
| } |
| return result; |
| } |
| |
| @safe unittest |
| { |
| import std.meta : AliasSeq; |
| foreach (T; AliasSeq!(double, const double, immutable double)) |
| { |
| T[] p = [ 0.0, 0, 0, 1 ]; |
| assert(entropy(p) == 0); |
| p = [ 0.25, 0.25, 0.25, 0.25 ]; |
| assert(entropy(p) == 2); |
| assert(entropy(p, 1) == 1); |
| } |
| } |
| |
| /** |
| Computes the $(LINK2 https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence, |
| Kullback-Leibler divergence) between input ranges |
| $(D a) and $(D b), which is the sum $(D ai * log(ai / bi)). The base |
| of logarithm is 2. The ranges are assumed to contain elements in $(D |
| [0, 1]). Usually the ranges are normalized probability distributions, |
| but this is not required or checked by $(D |
| kullbackLeiblerDivergence). If any element $(D bi) is zero and the |
| corresponding element $(D ai) nonzero, returns infinity. (Otherwise, |
| if $(D ai == 0 && bi == 0), the term $(D ai * log(ai / bi)) is |
| considered zero.) If the inputs are normalized, the result is |
| positive. |
| */ |
| CommonType!(ElementType!Range1, ElementType!Range2) |
| kullbackLeiblerDivergence(Range1, Range2)(Range1 a, Range2 b) |
| if (isInputRange!(Range1) && isInputRange!(Range2)) |
| { |
| enum bool haveLen = hasLength!(Range1) && hasLength!(Range2); |
| static if (haveLen) assert(a.length == b.length); |
| Unqual!(typeof(return)) result = 0; |
| for (; !a.empty; a.popFront(), b.popFront()) |
| { |
| immutable t1 = a.front; |
| if (t1 == 0) continue; |
| immutable t2 = b.front; |
| if (t2 == 0) return result.infinity; |
| assert(t1 > 0 && t2 > 0); |
| result += t1 * log2(t1 / t2); |
| } |
| static if (!haveLen) assert(b.empty); |
| return result; |
| } |
| |
| /// |
| @safe unittest |
| { |
| import std.math : approxEqual; |
| |
| double[] p = [ 0.0, 0, 0, 1 ]; |
| assert(kullbackLeiblerDivergence(p, p) == 0); |
| double[] p1 = [ 0.25, 0.25, 0.25, 0.25 ]; |
| assert(kullbackLeiblerDivergence(p1, p1) == 0); |
| assert(kullbackLeiblerDivergence(p, p1) == 2); |
| assert(kullbackLeiblerDivergence(p1, p) == double.infinity); |
| double[] p2 = [ 0.2, 0.2, 0.2, 0.4 ]; |
| assert(approxEqual(kullbackLeiblerDivergence(p1, p2), 0.0719281)); |
| assert(approxEqual(kullbackLeiblerDivergence(p2, p1), 0.0780719)); |
| } |
| |
| /** |
| Computes the $(LINK2 https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence, |
| Jensen-Shannon divergence) between $(D a) and $(D |
| b), which is the sum $(D (ai * log(2 * ai / (ai + bi)) + bi * log(2 * |
| bi / (ai + bi))) / 2). The base of logarithm is 2. The ranges are |
| assumed to contain elements in $(D [0, 1]). Usually the ranges are |
| normalized probability distributions, but this is not required or |
| checked by $(D jensenShannonDivergence). If the inputs are normalized, |
| the result is bounded within $(D [0, 1]). The three-parameter version |
| stops evaluations as soon as the intermediate result is greater than |
| or equal to $(D limit). |
| */ |
| CommonType!(ElementType!Range1, ElementType!Range2) |
| jensenShannonDivergence(Range1, Range2)(Range1 a, Range2 b) |
| if (isInputRange!Range1 && isInputRange!Range2 && |
| is(CommonType!(ElementType!Range1, ElementType!Range2))) |
| { |
| enum bool haveLen = hasLength!(Range1) && hasLength!(Range2); |
| static if (haveLen) assert(a.length == b.length); |
| Unqual!(typeof(return)) result = 0; |
| for (; !a.empty; a.popFront(), b.popFront()) |
| { |
| immutable t1 = a.front; |
| immutable t2 = b.front; |
| immutable avg = (t1 + t2) / 2; |
| if (t1 != 0) |
| { |
| result += t1 * log2(t1 / avg); |
| } |
| if (t2 != 0) |
| { |
| result += t2 * log2(t2 / avg); |
| } |
| } |
| static if (!haveLen) assert(b.empty); |
| return result / 2; |
| } |
| |
| /// Ditto |
| CommonType!(ElementType!Range1, ElementType!Range2) |
| jensenShannonDivergence(Range1, Range2, F)(Range1 a, Range2 b, F limit) |
| if (isInputRange!Range1 && isInputRange!Range2 && |
| is(typeof(CommonType!(ElementType!Range1, ElementType!Range2).init |
| >= F.init) : bool)) |
| { |
| enum bool haveLen = hasLength!(Range1) && hasLength!(Range2); |
| static if (haveLen) assert(a.length == b.length); |
| Unqual!(typeof(return)) result = 0; |
| limit *= 2; |
| for (; !a.empty; a.popFront(), b.popFront()) |
| { |
| immutable t1 = a.front; |
| immutable t2 = b.front; |
| immutable avg = (t1 + t2) / 2; |
| if (t1 != 0) |
| { |
| result += t1 * log2(t1 / avg); |
| } |
| if (t2 != 0) |
| { |
| result += t2 * log2(t2 / avg); |
| } |
| if (result >= limit) break; |
| } |
| static if (!haveLen) assert(b.empty); |
| return result / 2; |
| } |
| |
| /// |
| @safe unittest |
| { |
| import std.math : approxEqual; |
| |
| double[] p = [ 0.0, 0, 0, 1 ]; |
| assert(jensenShannonDivergence(p, p) == 0); |
| double[] p1 = [ 0.25, 0.25, 0.25, 0.25 ]; |
| assert(jensenShannonDivergence(p1, p1) == 0); |
| assert(approxEqual(jensenShannonDivergence(p1, p), 0.548795)); |
| double[] p2 = [ 0.2, 0.2, 0.2, 0.4 ]; |
| assert(approxEqual(jensenShannonDivergence(p1, p2), 0.0186218)); |
| assert(approxEqual(jensenShannonDivergence(p2, p1), 0.0186218)); |
| assert(approxEqual(jensenShannonDivergence(p2, p1, 0.005), 0.00602366)); |
| } |
| |
| /** |
| The so-called "all-lengths gap-weighted string kernel" computes a |
| similarity measure between $(D s) and $(D t) based on all of their |
| common subsequences of all lengths. Gapped subsequences are also |
| included. |
| |
| To understand what $(D gapWeightedSimilarity(s, t, lambda)) computes, |
| consider first the case $(D lambda = 1) and the strings $(D s = |
| ["Hello", "brave", "new", "world"]) and $(D t = ["Hello", "new", |
| "world"]). In that case, $(D gapWeightedSimilarity) counts the |
| following matches: |
| |
| $(OL $(LI three matches of length 1, namely $(D "Hello"), $(D "new"), |
| and $(D "world");) $(LI three matches of length 2, namely ($(D |
| "Hello", "new")), ($(D "Hello", "world")), and ($(D "new", "world"));) |
| $(LI one match of length 3, namely ($(D "Hello", "new", "world")).)) |
| |
| The call $(D gapWeightedSimilarity(s, t, 1)) simply counts all of |
| these matches and adds them up, returning 7. |
| |
| ---- |
| string[] s = ["Hello", "brave", "new", "world"]; |
| string[] t = ["Hello", "new", "world"]; |
| assert(gapWeightedSimilarity(s, t, 1) == 7); |
| ---- |
| |
| Note how the gaps in matching are simply ignored, for example ($(D |
| "Hello", "new")) is deemed as good a match as ($(D "new", |
| "world")). This may be too permissive for some applications. To |
| eliminate gapped matches entirely, use $(D lambda = 0): |
| |
| ---- |
| string[] s = ["Hello", "brave", "new", "world"]; |
| string[] t = ["Hello", "new", "world"]; |
| assert(gapWeightedSimilarity(s, t, 0) == 4); |
| ---- |
| |
| The call above eliminated the gapped matches ($(D "Hello", "new")), |
| ($(D "Hello", "world")), and ($(D "Hello", "new", "world")) from the |
| tally. That leaves only 4 matches. |
| |
| The most interesting case is when gapped matches still participate in |
| the result, but not as strongly as ungapped matches. The result will |
| be a smooth, fine-grained similarity measure between the input |
| strings. This is where values of $(D lambda) between 0 and 1 enter |
| into play: gapped matches are $(I exponentially penalized with the |
| number of gaps) with base $(D lambda). This means that an ungapped |
| match adds 1 to the return value; a match with one gap in either |
| string adds $(D lambda) to the return value; ...; a match with a total |
| of $(D n) gaps in both strings adds $(D pow(lambda, n)) to the return |
| value. In the example above, we have 4 matches without gaps, 2 matches |
| with one gap, and 1 match with three gaps. The latter match is ($(D |
| "Hello", "world")), which has two gaps in the first string and one gap |
| in the second string, totaling to three gaps. Summing these up we get |
| $(D 4 + 2 * lambda + pow(lambda, 3)). |
| |
| ---- |
| string[] s = ["Hello", "brave", "new", "world"]; |
| string[] t = ["Hello", "new", "world"]; |
| assert(gapWeightedSimilarity(s, t, 0.5) == 4 + 0.5 * 2 + 0.125); |
| ---- |
| |
| $(D gapWeightedSimilarity) is useful wherever a smooth similarity |
| measure between sequences allowing for approximate matches is |
| needed. The examples above are given with words, but any sequences |
| with elements comparable for equality are allowed, e.g. characters or |
| numbers. $(D gapWeightedSimilarity) uses a highly optimized dynamic |
| programming implementation that needs $(D 16 * min(s.length, |
| t.length)) extra bytes of memory and $(BIGOH s.length * t.length) time |
| to complete. |
| */ |
| F gapWeightedSimilarity(alias comp = "a == b", R1, R2, F)(R1 s, R2 t, F lambda) |
| if (isRandomAccessRange!(R1) && hasLength!(R1) && |
| isRandomAccessRange!(R2) && hasLength!(R2)) |
| { |
| import core.exception : onOutOfMemoryError; |
| import core.stdc.stdlib : malloc, free; |
| import std.algorithm.mutation : swap; |
| import std.functional : binaryFun; |
| |
| if (s.length < t.length) return gapWeightedSimilarity(t, s, lambda); |
| if (!t.length) return 0; |
| |
| auto dpvi = cast(F*) malloc(F.sizeof * 2 * t.length); |
| if (!dpvi) |
| onOutOfMemoryError(); |
| |
| auto dpvi1 = dpvi + t.length; |
| scope(exit) free(dpvi < dpvi1 ? dpvi : dpvi1); |
| dpvi[0 .. t.length] = 0; |
| dpvi1[0] = 0; |
| immutable lambda2 = lambda * lambda; |
| |
| F result = 0; |
| foreach (i; 0 .. s.length) |
| { |
| const si = s[i]; |
| for (size_t j = 0;;) |
| { |
| F dpsij = void; |
| if (binaryFun!(comp)(si, t[j])) |
| { |
| dpsij = 1 + dpvi[j]; |
| result += dpsij; |
| } |
| else |
| { |
| dpsij = 0; |
| } |
| immutable j1 = j + 1; |
| if (j1 == t.length) break; |
| dpvi1[j1] = dpsij + lambda * (dpvi1[j] + dpvi[j1]) - |
| lambda2 * dpvi[j]; |
| j = j1; |
| } |
| swap(dpvi, dpvi1); |
| } |
| return result; |
| } |
| |
| @system unittest |
| { |
| string[] s = ["Hello", "brave", "new", "world"]; |
| string[] t = ["Hello", "new", "world"]; |
| assert(gapWeightedSimilarity(s, t, 1) == 7); |
| assert(gapWeightedSimilarity(s, t, 0) == 4); |
| assert(gapWeightedSimilarity(s, t, 0.5) == 4 + 2 * 0.5 + 0.125); |
| } |
| |
| /** |
| The similarity per $(D gapWeightedSimilarity) has an issue in that it |
| grows with the lengths of the two strings, even though the strings are |
| not actually very similar. For example, the range $(D ["Hello", |
| "world"]) is increasingly similar with the range $(D ["Hello", |
| "world", "world", "world",...]) as more instances of $(D "world") are |
| appended. To prevent that, $(D gapWeightedSimilarityNormalized) |
| computes a normalized version of the similarity that is computed as |
| $(D gapWeightedSimilarity(s, t, lambda) / |
| sqrt(gapWeightedSimilarity(s, t, lambda) * gapWeightedSimilarity(s, t, |
| lambda))). The function $(D gapWeightedSimilarityNormalized) (a |
| so-called normalized kernel) is bounded in $(D [0, 1]), reaches $(D 0) |
| only for ranges that don't match in any position, and $(D 1) only for |
| identical ranges. |
| |
| The optional parameters $(D sSelfSim) and $(D tSelfSim) are meant for |
| avoiding duplicate computation. Many applications may have already |
| computed $(D gapWeightedSimilarity(s, s, lambda)) and/or $(D |
| gapWeightedSimilarity(t, t, lambda)). In that case, they can be passed |
| as $(D sSelfSim) and $(D tSelfSim), respectively. |
| */ |
| Select!(isFloatingPoint!(F), F, double) |
| gapWeightedSimilarityNormalized(alias comp = "a == b", R1, R2, F) |
| (R1 s, R2 t, F lambda, F sSelfSim = F.init, F tSelfSim = F.init) |
| if (isRandomAccessRange!(R1) && hasLength!(R1) && |
| isRandomAccessRange!(R2) && hasLength!(R2)) |
| { |
| static bool uncomputed(F n) |
| { |
| static if (isFloatingPoint!(F)) |
| return isNaN(n); |
| else |
| return n == n.init; |
| } |
| if (uncomputed(sSelfSim)) |
| sSelfSim = gapWeightedSimilarity!(comp)(s, s, lambda); |
| if (sSelfSim == 0) return 0; |
| if (uncomputed(tSelfSim)) |
| tSelfSim = gapWeightedSimilarity!(comp)(t, t, lambda); |
| if (tSelfSim == 0) return 0; |
| |
| return gapWeightedSimilarity!(comp)(s, t, lambda) / |
| sqrt(cast(typeof(return)) sSelfSim * tSelfSim); |
| } |
| |
| /// |
| @system unittest |
| { |
| import std.math : approxEqual, sqrt; |
| |
| string[] s = ["Hello", "brave", "new", "world"]; |
| string[] t = ["Hello", "new", "world"]; |
| assert(gapWeightedSimilarity(s, s, 1) == 15); |
| assert(gapWeightedSimilarity(t, t, 1) == 7); |
| assert(gapWeightedSimilarity(s, t, 1) == 7); |
| assert(approxEqual(gapWeightedSimilarityNormalized(s, t, 1), |
| 7.0 / sqrt(15.0 * 7), 0.01)); |
| } |
| |
| /** |
| Similar to $(D gapWeightedSimilarity), just works in an incremental |
| manner by first revealing the matches of length 1, then gapped matches |
| of length 2, and so on. The memory requirement is $(BIGOH s.length * |
| t.length). The time complexity is $(BIGOH s.length * t.length) time |
| for computing each step. Continuing on the previous example: |
| |
| The implementation is based on the pseudocode in Fig. 4 of the paper |
| $(HTTP jmlr.csail.mit.edu/papers/volume6/rousu05a/rousu05a.pdf, |
| "Efficient Computation of Gapped Substring Kernels on Large Alphabets") |
| by Rousu et al., with additional algorithmic and systems-level |
| optimizations. |
| */ |
| struct GapWeightedSimilarityIncremental(Range, F = double) |
| if (isRandomAccessRange!(Range) && hasLength!(Range)) |
| { |
| import core.stdc.stdlib : malloc, realloc, alloca, free; |
| |
| private: |
| Range s, t; |
| F currentValue = 0; |
| F* kl; |
| size_t gram = void; |
| F lambda = void, lambda2 = void; |
| |
| public: |
| /** |
| Constructs an object given two ranges $(D s) and $(D t) and a penalty |
| $(D lambda). Constructor completes in $(BIGOH s.length * t.length) |
| time and computes all matches of length 1. |
| */ |
| this(Range s, Range t, F lambda) |
| { |
| import core.exception : onOutOfMemoryError; |
| |
| assert(lambda > 0); |
| this.gram = 0; |
| this.lambda = lambda; |
| this.lambda2 = lambda * lambda; // for efficiency only |
| |
| size_t iMin = size_t.max, jMin = size_t.max, |
| iMax = 0, jMax = 0; |
| /* initialize */ |
| Tuple!(size_t, size_t) * k0; |
| size_t k0len; |
| scope(exit) free(k0); |
| currentValue = 0; |
| foreach (i, si; s) |
| { |
| foreach (j; 0 .. t.length) |
| { |
| if (si != t[j]) continue; |
| k0 = cast(typeof(k0)) realloc(k0, ++k0len * (*k0).sizeof); |
| with (k0[k0len - 1]) |
| { |
| field[0] = i; |
| field[1] = j; |
| } |
| // Maintain the minimum and maximum i and j |
| if (iMin > i) iMin = i; |
| if (iMax < i) iMax = i; |
| if (jMin > j) jMin = j; |
| if (jMax < j) jMax = j; |
| } |
| } |
| |
| if (iMin > iMax) return; |
| assert(k0len); |
| |
| currentValue = k0len; |
| // Chop strings down to the useful sizes |
| s = s[iMin .. iMax + 1]; |
| t = t[jMin .. jMax + 1]; |
| this.s = s; |
| this.t = t; |
| |
| kl = cast(F*) malloc(s.length * t.length * F.sizeof); |
| if (!kl) |
| onOutOfMemoryError(); |
| |
| kl[0 .. s.length * t.length] = 0; |
| foreach (pos; 0 .. k0len) |
| { |
| with (k0[pos]) |
| { |
| kl[(field[0] - iMin) * t.length + field[1] -jMin] = lambda2; |
| } |
| } |
| } |
| |
| /** |
| Returns: $(D this). |
| */ |
| ref GapWeightedSimilarityIncremental opSlice() |
| { |
| return this; |
| } |
| |
| /** |
| Computes the match of the popFront length. Completes in $(BIGOH s.length * |
| t.length) time. |
| */ |
| void popFront() |
| { |
| import std.algorithm.mutation : swap; |
| |
| // This is a large source of optimization: if similarity at |
| // the gram-1 level was 0, then we can safely assume |
| // similarity at the gram level is 0 as well. |
| if (empty) return; |
| |
| // Now attempt to match gapped substrings of length `gram' |
| ++gram; |
| currentValue = 0; |
| |
| auto Si = cast(F*) alloca(t.length * F.sizeof); |
| Si[0 .. t.length] = 0; |
| foreach (i; 0 .. s.length) |
| { |
| const si = s[i]; |
| F Sij_1 = 0; |
| F Si_1j_1 = 0; |
| auto kli = kl + i * t.length; |
| for (size_t j = 0;;) |
| { |
| const klij = kli[j]; |
| const Si_1j = Si[j]; |
| const tmp = klij + lambda * (Si_1j + Sij_1) - lambda2 * Si_1j_1; |
| // now update kl and currentValue |
| if (si == t[j]) |
| currentValue += kli[j] = lambda2 * Si_1j_1; |
| else |
| kli[j] = 0; |
| // commit to Si |
| Si[j] = tmp; |
| if (++j == t.length) break; |
| // get ready for the popFront step; virtually increment j, |
| // so essentially stuffj_1 <-- stuffj |
| Si_1j_1 = Si_1j; |
| Sij_1 = tmp; |
| } |
| } |
| currentValue /= pow(lambda, 2 * (gram + 1)); |
| |
| version (none) |
| { |
| Si_1[0 .. t.length] = 0; |
| kl[0 .. min(t.length, maxPerimeter + 1)] = 0; |
| foreach (i; 1 .. min(s.length, maxPerimeter + 1)) |
| { |
| auto kli = kl + i * t.length; |
| assert(s.length > i); |
| const si = s[i]; |
| auto kl_1i_1 = kl_1 + (i - 1) * t.length; |
| kli[0] = 0; |
| F lastS = 0; |
| foreach (j; 1 .. min(maxPerimeter - i + 1, t.length)) |
| { |
| immutable j_1 = j - 1; |
| immutable tmp = kl_1i_1[j_1] |
| + lambda * (Si_1[j] + lastS) |
| - lambda2 * Si_1[j_1]; |
| kl_1i_1[j_1] = float.nan; |
| Si_1[j_1] = lastS; |
| lastS = tmp; |
| if (si == t[j]) |
| { |
| currentValue += kli[j] = lambda2 * lastS; |
| } |
| else |
| { |
| kli[j] = 0; |
| } |
| } |
| Si_1[t.length - 1] = lastS; |
| } |
| currentValue /= pow(lambda, 2 * (gram + 1)); |
| // get ready for the popFront computation |
| swap(kl, kl_1); |
| } |
| } |
| |
| /** |
| Returns: The gapped similarity at the current match length (initially |
| 1, grows with each call to $(D popFront)). |
| */ |
| @property F front() { return currentValue; } |
| |
| /** |
| Returns: Whether there are more matches. |
| */ |
| @property bool empty() |
| { |
| if (currentValue) return false; |
| if (kl) |
| { |
| free(kl); |
| kl = null; |
| } |
| return true; |
| } |
| } |
| |
| /** |
| Ditto |
| */ |
| GapWeightedSimilarityIncremental!(R, F) gapWeightedSimilarityIncremental(R, F) |
| (R r1, R r2, F penalty) |
| { |
| return typeof(return)(r1, r2, penalty); |
| } |
| |
| /// |
| @system unittest |
| { |
| string[] s = ["Hello", "brave", "new", "world"]; |
| string[] t = ["Hello", "new", "world"]; |
| auto simIter = gapWeightedSimilarityIncremental(s, t, 1.0); |
| assert(simIter.front == 3); // three 1-length matches |
| simIter.popFront(); |
| assert(simIter.front == 3); // three 2-length matches |
| simIter.popFront(); |
| assert(simIter.front == 1); // one 3-length match |
| simIter.popFront(); |
| assert(simIter.empty); // no more match |
| } |
| |
| @system unittest |
| { |
| import std.conv : text; |
| string[] s = ["Hello", "brave", "new", "world"]; |
| string[] t = ["Hello", "new", "world"]; |
| auto simIter = gapWeightedSimilarityIncremental(s, t, 1.0); |
| //foreach (e; simIter) writeln(e); |
| assert(simIter.front == 3); // three 1-length matches |
| simIter.popFront(); |
| assert(simIter.front == 3, text(simIter.front)); // three 2-length matches |
| simIter.popFront(); |
| assert(simIter.front == 1); // one 3-length matches |
| simIter.popFront(); |
| assert(simIter.empty); // no more match |
| |
| s = ["Hello"]; |
| t = ["bye"]; |
| simIter = gapWeightedSimilarityIncremental(s, t, 0.5); |
| assert(simIter.empty); |
| |
| s = ["Hello"]; |
| t = ["Hello"]; |
| simIter = gapWeightedSimilarityIncremental(s, t, 0.5); |
| assert(simIter.front == 1); // one match |
| simIter.popFront(); |
| assert(simIter.empty); |
| |
| s = ["Hello", "world"]; |
| t = ["Hello"]; |
| simIter = gapWeightedSimilarityIncremental(s, t, 0.5); |
| assert(simIter.front == 1); // one match |
| simIter.popFront(); |
| assert(simIter.empty); |
| |
| s = ["Hello", "world"]; |
| t = ["Hello", "yah", "world"]; |
| simIter = gapWeightedSimilarityIncremental(s, t, 0.5); |
| assert(simIter.front == 2); // two 1-gram matches |
| simIter.popFront(); |
| assert(simIter.front == 0.5, text(simIter.front)); // one 2-gram match, 1 gap |
| } |
| |
| @system unittest |
| { |
| GapWeightedSimilarityIncremental!(string[]) sim = |
| GapWeightedSimilarityIncremental!(string[])( |
| ["nyuk", "I", "have", "no", "chocolate", "giba"], |
| ["wyda", "I", "have", "I", "have", "have", "I", "have", "hehe"], |
| 0.5); |
| double[] witness = [ 7.0, 4.03125, 0, 0 ]; |
| foreach (e; sim) |
| { |
| //writeln(e); |
| assert(e == witness.front); |
| witness.popFront(); |
| } |
| witness = [ 3.0, 1.3125, 0.25 ]; |
| sim = GapWeightedSimilarityIncremental!(string[])( |
| ["I", "have", "no", "chocolate"], |
| ["I", "have", "some", "chocolate"], |
| 0.5); |
| foreach (e; sim) |
| { |
| //writeln(e); |
| assert(e == witness.front); |
| witness.popFront(); |
| } |
| assert(witness.empty); |
| } |
| |
| /** |
| Computes the greatest common divisor of $(D a) and $(D b) by using |
| an efficient algorithm such as $(HTTPS en.wikipedia.org/wiki/Euclidean_algorithm, Euclid's) |
| or $(HTTPS en.wikipedia.org/wiki/Binary_GCD_algorithm, Stein's) algorithm. |
| |
| Params: |
| T = Any numerical type that supports the modulo operator `%`. If |
| bit-shifting `<<` and `>>` are also supported, Stein's algorithm will |
| be used; otherwise, Euclid's algorithm is used as _a fallback. |
| Returns: |
| The greatest common divisor of the given arguments. |
| */ |
| T gcd(T)(T a, T b) |
| if (isIntegral!T) |
| { |
| static if (is(T == const) || is(T == immutable)) |
| { |
| return gcd!(Unqual!T)(a, b); |
| } |
| else version (DigitalMars) |
| { |
| static if (T.min < 0) |
| { |
| assert(a >= 0 && b >= 0); |
| } |
| while (b) |
| { |
| immutable t = b; |
| b = a % b; |
| a = t; |
| } |
| return a; |
| } |
| else |
| { |
| if (a == 0) |
| return b; |
| if (b == 0) |
| return a; |
| |
| import core.bitop : bsf; |
| import std.algorithm.mutation : swap; |
| |
| immutable uint shift = bsf(a | b); |
| a >>= a.bsf; |
| |
| do |
| { |
| b >>= b.bsf; |
| if (a > b) |
| swap(a, b); |
| b -= a; |
| } while (b); |
| |
| return a << shift; |
| } |
| } |
| |
| /// |
| @safe unittest |
| { |
| assert(gcd(2 * 5 * 7 * 7, 5 * 7 * 11) == 5 * 7); |
| const int a = 5 * 13 * 23 * 23, b = 13 * 59; |
| assert(gcd(a, b) == 13); |
| } |
| |
| // This overload is for non-builtin numerical types like BigInt or |
| // user-defined types. |
| /// ditto |
| T gcd(T)(T a, T b) |
| if (!isIntegral!T && |
| is(typeof(T.init % T.init)) && |
| is(typeof(T.init == 0 || T.init > 0))) |
| { |
| import std.algorithm.mutation : swap; |
| |
| enum canUseBinaryGcd = is(typeof(() { |
| T t, u; |
| t <<= 1; |
| t >>= 1; |
| t -= u; |
| bool b = (t & 1) == 0; |
| swap(t, u); |
| })); |
| |
| assert(a >= 0 && b >= 0); |
| |
| static if (canUseBinaryGcd) |
| { |
| uint shift = 0; |
| while ((a & 1) == 0 && (b & 1) == 0) |
| { |
| a >>= 1; |
| b >>= 1; |
| shift++; |
| } |
| |
| do |
| { |
| assert((a & 1) != 0); |
| while ((b & 1) == 0) |
| b >>= 1; |
| if (a > b) |
| swap(a, b); |
| b -= a; |
| } while (b); |
| |
| return a << shift; |
| } |
| else |
| { |
| // The only thing we have is %; fallback to Euclidean algorithm. |
| while (b != 0) |
| { |
| auto t = b; |
| b = a % b; |
| a = t; |
| } |
| return a; |
| } |
| } |
| |
| // Issue 7102 |
| @system pure unittest |
| { |
| import std.bigint : BigInt; |
| assert(gcd(BigInt("71_000_000_000_000_000_000"), |
| BigInt("31_000_000_000_000_000_000")) == |
| BigInt("1_000_000_000_000_000_000")); |
| } |
| |
| @safe pure nothrow unittest |
| { |
| // A numerical type that only supports % and - (to force gcd implementation |
| // to use Euclidean algorithm). |
| struct CrippledInt |
| { |
| int impl; |
| CrippledInt opBinary(string op : "%")(CrippledInt i) |
| { |
| return CrippledInt(impl % i.impl); |
| } |
| int opEquals(CrippledInt i) { return impl == i.impl; } |
| int opEquals(int i) { return impl == i; } |
| int opCmp(int i) { return (impl < i) ? -1 : (impl > i) ? 1 : 0; } |
| } |
| assert(gcd(CrippledInt(2310), CrippledInt(1309)) == CrippledInt(77)); |
| } |
| |
| // This is to make tweaking the speed/size vs. accuracy tradeoff easy, |
| // though floats seem accurate enough for all practical purposes, since |
| // they pass the "approxEqual(inverseFft(fft(arr)), arr)" test even for |
| // size 2 ^^ 22. |
| private alias lookup_t = float; |
| |
| /**A class for performing fast Fourier transforms of power of two sizes. |
| * This class encapsulates a large amount of state that is reusable when |
| * performing multiple FFTs of sizes smaller than or equal to that specified |
| * in the constructor. This results in substantial speedups when performing |
| * multiple FFTs with a known maximum size. However, |
| * a free function API is provided for convenience if you need to perform a |
| * one-off FFT. |
| * |
| * References: |
| * $(HTTP en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm) |
| */ |
| final class Fft |
| { |
| import core.bitop : bsf; |
| import std.algorithm.iteration : map; |
| import std.array : uninitializedArray; |
| |
| private: |
| immutable lookup_t[][] negSinLookup; |
| |
| void enforceSize(R)(R range) const |
| { |
| import std.conv : text; |
| assert(range.length <= size, text( |
| "FFT size mismatch. Expected ", size, ", got ", range.length)); |
| } |
| |
| void fftImpl(Ret, R)(Stride!R range, Ret buf) const |
| in |
| { |
| assert(range.length >= 4); |
| assert(isPowerOf2(range.length)); |
| } |
| body |
| { |
| auto recurseRange = range; |
| recurseRange.doubleSteps(); |
| |
| if (buf.length > 4) |
| { |
| fftImpl(recurseRange, buf[0..$ / 2]); |
| recurseRange.popHalf(); |
| fftImpl(recurseRange, buf[$ / 2..$]); |
| } |
| else |
| { |
| // Do this here instead of in another recursion to save on |
| // recursion overhead. |
| slowFourier2(recurseRange, buf[0..$ / 2]); |
| recurseRange.popHalf(); |
| slowFourier2(recurseRange, buf[$ / 2..$]); |
| } |
| |
| butterfly(buf); |
| } |
| |
| // This algorithm works by performing the even and odd parts of our FFT |
| // using the "two for the price of one" method mentioned at |
| // http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM#Head521 |
| // by making the odd terms into the imaginary components of our new FFT, |
| // and then using symmetry to recombine them. |
| void fftImplPureReal(Ret, R)(R range, Ret buf) const |
| in |
| { |
| assert(range.length >= 4); |
| assert(isPowerOf2(range.length)); |
| } |
| body |
| { |
| alias E = ElementType!R; |
| |
| // Converts odd indices of range to the imaginary components of |
| // a range half the size. The even indices become the real components. |
| static if (isArray!R && isFloatingPoint!E) |
| { |
| // Then the memory layout of complex numbers provides a dirt |
| // cheap way to convert. This is a common case, so take advantage. |
| auto oddsImag = cast(Complex!E[]) range; |
| } |
| else |
| { |
| // General case: Use a higher order range. We can assume |
| // source.length is even because it has to be a power of 2. |
| static struct OddToImaginary |
| { |
| R source; |
| alias C = Complex!(CommonType!(E, typeof(buf[0].re))); |
| |
| @property |
| { |
| C front() |
| { |
| return C(source[0], source[1]); |
| } |
| |
| C back() |
| { |
| immutable n = source.length; |
| return C(source[n - 2], source[n - 1]); |
| } |
| |
| typeof(this) save() |
| { |
| return typeof(this)(source.save); |
| } |
| |
| bool empty() |
| { |
| return source.empty; |
| } |
| |
| size_t length() |
| { |
| return source.length / 2; |
| } |
| } |
| |
| void popFront() |
| { |
| source.popFront(); |
| source.popFront(); |
| } |
| |
| void popBack() |
| { |
| source.popBack(); |
| source.popBack(); |
| } |
| |
| C opIndex(size_t index) |
| { |
| return C(source[index * 2], source[index * 2 + 1]); |
| } |
| |
| typeof(this) opSlice(size_t lower, size_t upper) |
| { |
| return typeof(this)(source[lower * 2 .. upper * 2]); |
| } |
| } |
| |
| auto oddsImag = OddToImaginary(range); |
| } |
| |
| fft(oddsImag, buf[0..$ / 2]); |
| auto evenFft = buf[0..$ / 2]; |
| auto oddFft = buf[$ / 2..$]; |
| immutable halfN = evenFft.length; |
| oddFft[0].re = buf[0].im; |
| oddFft[0].im = 0; |
| evenFft[0].im = 0; |
| // evenFft[0].re is already right b/c it's aliased with buf[0].re. |
| |
| foreach (k; 1 .. halfN / 2 + 1) |
| { |
| immutable bufk = buf[k]; |
| immutable bufnk = buf[buf.length / 2 - k]; |
| evenFft[k].re = 0.5 * (bufk.re + bufnk.re); |
| evenFft[halfN - k].re = evenFft[k].re; |
| evenFft[k].im = 0.5 * (bufk.im - bufnk.im); |
| evenFft[halfN - k].im = -evenFft[k].im; |
| |
| oddFft[k].re = 0.5 * (bufk.im + bufnk.im); |
| oddFft[halfN - k].re = oddFft[k].re; |
| oddFft[k].im = 0.5 * (bufnk.re - bufk.re); |
| oddFft[halfN - k].im = -oddFft[k].im; |
| } |
| |
| butterfly(buf); |
| } |
| |
| void butterfly(R)(R buf) const |
| in |
| { |
| assert(isPowerOf2(buf.length)); |
| } |
| body |
| { |
| immutable n = buf.length; |
| immutable localLookup = negSinLookup[bsf(n)]; |
| assert(localLookup.length == n); |
| |
| immutable cosMask = n - 1; |
| immutable cosAdd = n / 4 * 3; |
| |
| lookup_t negSinFromLookup(size_t index) pure nothrow |
| { |
| return localLookup[index]; |
| } |
| |
| lookup_t cosFromLookup(size_t index) pure nothrow |
| { |
| // cos is just -sin shifted by PI * 3 / 2. |
| return localLookup[(index + cosAdd) & cosMask]; |
| } |
| |
| immutable halfLen = n / 2; |
| |
| // This loop is unrolled and the two iterations are interleaved |
| // relative to the textbook FFT to increase ILP. This gives roughly 5% |
| // speedups on DMD. |
| for (size_t k = 0; k < halfLen; k += 2) |
| { |
| immutable cosTwiddle1 = cosFromLookup(k); |
| immutable sinTwiddle1 = negSinFromLookup(k); |
| immutable cosTwiddle2 = cosFromLookup(k + 1); |
| immutable sinTwiddle2 = negSinFromLookup(k + 1); |
| |
| immutable realLower1 = buf[k].re; |
| immutable imagLower1 = buf[k].im; |
| immutable realLower2 = buf[k + 1].re; |
| immutable imagLower2 = buf[k + 1].im; |
| |
| immutable upperIndex1 = k + halfLen; |
| immutable upperIndex2 = upperIndex1 + 1; |
| immutable realUpper1 = buf[upperIndex1].re; |
| immutable imagUpper1 = buf[upperIndex1].im; |
| immutable realUpper2 = buf[upperIndex2].re; |
| immutable imagUpper2 = buf[upperIndex2].im; |
| |
| immutable realAdd1 = cosTwiddle1 * realUpper1 |
| - sinTwiddle1 * imagUpper1; |
| immutable imagAdd1 = sinTwiddle1 * realUpper1 |
| + cosTwiddle1 * imagUpper1; |
| immutable realAdd2 = cosTwiddle2 * realUpper2 |
| - sinTwiddle2 * imagUpper2; |
| immutable imagAdd2 = sinTwiddle2 * realUpper2 |
| + cosTwiddle2 * imagUpper2; |
| |
| buf[k].re += realAdd1; |
| buf[k].im += imagAdd1; |
| buf[k + 1].re += realAdd2; |
| buf[k + 1].im += imagAdd2; |
| |
| buf[upperIndex1].re = realLower1 - realAdd1; |
| buf[upperIndex1].im = imagLower1 - imagAdd1; |
| buf[upperIndex2].re = realLower2 - realAdd2; |
| buf[upperIndex2].im = imagLower2 - imagAdd2; |
| } |
| } |
| |
| // This constructor is used within this module for allocating the |
| // buffer space elsewhere besides the GC heap. It's definitely **NOT** |
| // part of the public API and definitely **IS** subject to change. |
| // |
| // Also, this is unsafe because the memSpace buffer will be cast |
| // to immutable. |
| public this(lookup_t[] memSpace) // Public b/c of bug 4636. |
| { |
| immutable size = memSpace.length / 2; |
| |
| /* Create a lookup table of all negative sine values at a resolution of |
| * size and all smaller power of two resolutions. This may seem |
| * inefficient, but having all the lookups be next to each other in |
| * memory at every level of iteration is a huge win performance-wise. |
| */ |
| if (size == 0) |
| { |
| return; |
| } |
| |
| assert(isPowerOf2(size), |
| "Can only do FFTs on ranges with a size that is a power of two."); |
| |
| auto table = new lookup_t[][bsf(size) + 1]; |
| |
| table[$ - 1] = memSpace[$ - size..$]; |
| memSpace = memSpace[0 .. size]; |
| |
| auto lastRow = table[$ - 1]; |
| lastRow[0] = 0; // -sin(0) == 0. |
| foreach (ptrdiff_t i; 1 .. size) |
| { |
| // The hard coded cases are for improved accuracy and to prevent |
| // annoying non-zeroness when stuff should be zero. |
| |
| if (i == size / 4) |
| lastRow[i] = -1; // -sin(pi / 2) == -1. |
| else if (i == size / 2) |
| lastRow[i] = 0; // -sin(pi) == 0. |
| else if (i == size * 3 / 4) |
| lastRow[i] = 1; // -sin(pi * 3 / 2) == 1 |
| else |
| lastRow[i] = -sin(i * 2.0L * PI / size); |
| } |
| |
| // Fill in all the other rows with strided versions. |
| foreach (i; 1 .. table.length - 1) |
| { |
| immutable strideLength = size / (2 ^^ i); |
| auto strided = Stride!(lookup_t[])(lastRow, strideLength); |
| table[i] = memSpace[$ - strided.length..$]; |
| memSpace = memSpace[0..$ - strided.length]; |
| |
| size_t copyIndex; |
| foreach (elem; strided) |
| { |
| table[i][copyIndex++] = elem; |
| } |
| } |
| |
| negSinLookup = cast(immutable) table; |
| } |
| |
| public: |
| /**Create an $(D Fft) object for computing fast Fourier transforms of |
| * power of two sizes of $(D size) or smaller. $(D size) must be a |
| * power of two. |
| */ |
| this(size_t size) |
| { |
| // Allocate all twiddle factor buffers in one contiguous block so that, |
| // when one is done being used, the next one is next in cache. |
| auto memSpace = uninitializedArray!(lookup_t[])(2 * size); |
| this(memSpace); |
| } |
| |
| @property size_t size() const |
| { |
| return (negSinLookup is null) ? 0 : negSinLookup[$ - 1].length; |
| } |
| |
| /**Compute the Fourier transform of range using the $(BIGOH N log N) |
| * Cooley-Tukey Algorithm. $(D range) must be a random-access range with |
| * slicing and a length equal to $(D size) as provided at the construction of |
| * this object. The contents of range can be either numeric types, |
| * which will be interpreted as pure real values, or complex types with |
| * properties or members $(D .re) and $(D .im) that can be read. |
| * |
| * Note: Pure real FFTs are automatically detected and the relevant |
| * optimizations are performed. |
| * |
| * Returns: An array of complex numbers representing the transformed data in |
| * the frequency domain. |
| * |
| * Conventions: The exponent is negative and the factor is one, |
| * i.e., output[j] := sum[ exp(-2 PI i j k / N) input[k] ]. |
| */ |
| Complex!F[] fft(F = double, R)(R range) const |
| if (isFloatingPoint!F && isRandomAccessRange!R) |
| { |
| enforceSize(range); |
| Complex!F[] ret; |
| if (range.length == 0) |
| { |
| return ret; |
| } |
| |
| // Don't waste time initializing the memory for ret. |
| ret = uninitializedArray!(Complex!F[])(range.length); |
| |
| fft(range, ret); |
| return ret; |
| } |
| |
| /**Same as the overload, but allows for the results to be stored in a user- |
| * provided buffer. The buffer must be of the same length as range, must be |
| * a random-access range, must have slicing, and must contain elements that are |
| * complex-like. This means that they must have a .re and a .im member or |
| * property that can be both read and written and are floating point numbers. |
| */ |
| void fft(Ret, R)(R range, Ret buf) const |
| if (isRandomAccessRange!Ret && isComplexLike!(ElementType!Ret) && hasSlicing!Ret) |
| { |
| assert(buf.length == range.length); |
| enforceSize(range); |
| |
| if (range.length == 0) |
| { |
| return; |
| } |
| else if (range.length == 1) |
| { |
| buf[0] = range[0]; |
| return; |
| } |
| else if (range.length == 2) |
| { |
| slowFourier2(range, buf); |
| return; |
| } |
| else |
| { |
| alias E = ElementType!R; |
| static if (is(E : real)) |
| { |
| return fftImplPureReal(range, buf); |
| } |
| else |
| { |
| static if (is(R : Stride!R)) |
| return fftImpl(range, buf); |
| else |
| return fftImpl(Stride!R(range, 1), buf); |
| } |
| } |
| } |
| |
| /** |
| * Computes the inverse Fourier transform of a range. The range must be a |
| * random access range with slicing, have a length equal to the size |
| * provided at construction of this object, and contain elements that are |
| * either of type std.complex.Complex or have essentially |
| * the same compile-time interface. |
| * |
| * Returns: The time-domain signal. |
| * |
| * Conventions: The exponent is positive and the factor is 1/N, i.e., |
| * output[j] := (1 / N) sum[ exp(+2 PI i j k / N) input[k] ]. |
| */ |
| Complex!F[] inverseFft(F = double, R)(R range) const |
| if (isRandomAccessRange!R && isComplexLike!(ElementType!R) && isFloatingPoint!F) |
| { |
| enforceSize(range); |
| Complex!F[] ret; |
| if (range.length == 0) |
| { |
| return ret; |
| } |
| |
| // Don't waste time initializing the memory for ret. |
| ret = uninitializedArray!(Complex!F[])(range.length); |
| |
| inverseFft(range, ret); |
| return ret; |
| } |
| |
| /** |
| * Inverse FFT that allows a user-supplied buffer to be provided. The buffer |
| * must be a random access range with slicing, and its elements |
| * must be some complex-like type. |
| */ |
| void inverseFft(Ret, R)(R range, Ret buf) const |
| if (isRandomAccessRange!Ret && isComplexLike!(ElementType!Ret) && hasSlicing!Ret) |
| { |
| enforceSize(range); |
| |
| auto swapped = map!swapRealImag(range); |
| fft(swapped, buf); |
| |
| immutable lenNeg1 = 1.0 / buf.length; |
| foreach (ref elem; buf) |
| { |
| immutable temp = elem.re * lenNeg1; |
| elem.re = elem.im * lenNeg1; |
| elem.im = temp; |
| } |
| } |
| } |
| |
| // This mixin creates an Fft object in the scope it's mixed into such that all |
| // memory owned by the object is deterministically destroyed at the end of that |
| // scope. |
| private enum string MakeLocalFft = q{ |
| import core.stdc.stdlib; |
| import core.exception : onOutOfMemoryError; |
| |
| auto lookupBuf = (cast(lookup_t*) malloc(range.length * 2 * lookup_t.sizeof)) |
| [0 .. 2 * range.length]; |
| if (!lookupBuf.ptr) |
| onOutOfMemoryError(); |
| |
| scope(exit) free(cast(void*) lookupBuf.ptr); |
| auto fftObj = scoped!Fft(lookupBuf); |
| }; |
| |
| /**Convenience functions that create an $(D Fft) object, run the FFT or inverse |
| * FFT and return the result. Useful for one-off FFTs. |
| * |
| * Note: In addition to convenience, these functions are slightly more |
| * efficient than manually creating an Fft object for a single use, |
| * as the Fft object is deterministically destroyed before these |
| * functions return. |
| */ |
| Complex!F[] fft(F = double, R)(R range) |
| { |
| mixin(MakeLocalFft); |
| return fftObj.fft!(F, R)(range); |
| } |
| |
| /// ditto |
| void fft(Ret, R)(R range, Ret buf) |
| { |
| mixin(MakeLocalFft); |
| return fftObj.fft!(Ret, R)(range, buf); |
| } |
| |
| /// ditto |
| Complex!F[] inverseFft(F = double, R)(R range) |
| { |
| mixin(MakeLocalFft); |
| return fftObj.inverseFft!(F, R)(range); |
| } |
| |
| /// ditto |
| void inverseFft(Ret, R)(R range, Ret buf) |
| { |
| mixin(MakeLocalFft); |
| return fftObj.inverseFft!(Ret, R)(range, buf); |
| } |
| |
| @system unittest |
| { |
| import std.algorithm; |
| import std.conv; |
| import std.range; |
| // Test values from R and Octave. |
| auto arr = [1,2,3,4,5,6,7,8]; |
| auto fft1 = fft(arr); |
| assert(approxEqual(map!"a.re"(fft1), |
| [36.0, -4, -4, -4, -4, -4, -4, -4])); |
| assert(approxEqual(map!"a.im"(fft1), |
| [0, 9.6568, 4, 1.6568, 0, -1.6568, -4, -9.6568])); |
| |
| auto fft1Retro = fft(retro(arr)); |
| assert(approxEqual(map!"a.re"(fft1Retro), |
| [36.0, 4, 4, 4, 4, 4, 4, 4])); |
| assert(approxEqual(map!"a.im"(fft1Retro), |
| [0, -9.6568, -4, -1.6568, 0, 1.6568, 4, 9.6568])); |
| |
| auto fft1Float = fft(to!(float[])(arr)); |
| assert(approxEqual(map!"a.re"(fft1), map!"a.re"(fft1Float))); |
| assert(approxEqual(map!"a.im"(fft1), map!"a.im"(fft1Float))); |
| |
| alias C = Complex!float; |
| auto arr2 = [C(1,2), C(3,4), C(5,6), C(7,8), C(9,10), |
| C(11,12), C(13,14), C(15,16)]; |
| auto fft2 = fft(arr2); |
| assert(approxEqual(map!"a.re"(fft2), |
| [64.0, -27.3137, -16, -11.3137, -8, -4.6862, 0, 11.3137])); |
| assert(approxEqual(map!"a.im"(fft2), |
| [72, 11.3137, 0, -4.686, -8, -11.3137, -16, -27.3137])); |
| |
| auto inv1 = inverseFft(fft1); |
| assert(approxEqual(map!"a.re"(inv1), arr)); |
| assert(reduce!max(map!"a.im"(inv1)) < 1e-10); |
| |
| auto inv2 = inverseFft(fft2); |
| assert(approxEqual(map!"a.re"(inv2), map!"a.re"(arr2))); |
| assert(approxEqual(map!"a.im"(inv2), map!"a.im"(arr2))); |
| |
| // FFTs of size 0, 1 and 2 are handled as special cases. Test them here. |
| ushort[] empty; |
| assert(fft(empty) == null); |
| assert(inverseFft(fft(empty)) == null); |
| |
| real[] oneElem = [4.5L]; |
| auto oneFft = fft(oneElem); |
| assert(oneFft.length == 1); |
| assert(oneFft[0].re == 4.5L); |
| assert(oneFft[0].im == 0); |
| |
| auto oneInv = inverseFft(oneFft); |
| assert(oneInv.length == 1); |
| assert(approxEqual(oneInv[0].re, 4.5)); |
| assert(approxEqual(oneInv[0].im, 0)); |
| |
| long[2] twoElems = [8, 4]; |
| auto twoFft = fft(twoElems[]); |
| assert(twoFft.length == 2); |
| assert(approxEqual(twoFft[0].re, 12)); |
| assert(approxEqual(twoFft[0].im, 0)); |
| assert(approxEqual(twoFft[1].re, 4)); |
| assert(approxEqual(twoFft[1].im, 0)); |
| auto twoInv = inverseFft(twoFft); |
| assert(approxEqual(twoInv[0].re, 8)); |
| assert(approxEqual(twoInv[0].im, 0)); |
| assert(approxEqual(twoInv[1].re, 4)); |
| assert(approxEqual(twoInv[1].im, 0)); |
| } |
| |
| // Swaps the real and imaginary parts of a complex number. This is useful |
| // for inverse FFTs. |
| C swapRealImag(C)(C input) |
| { |
| return C(input.im, input.re); |
| } |
| |
| private: |
| // The reasons I couldn't use std.algorithm were b/c its stride length isn't |
| // modifiable on the fly and because range has grown some performance hacks |
| // for powers of 2. |
| struct Stride(R) |
| { |
| import core.bitop : bsf; |
| Unqual!R range; |
| size_t _nSteps; |
| size_t _length; |
| alias E = ElementType!(R); |
| |
| this(R range, size_t nStepsIn) |
| { |
| this.range = range; |
| _nSteps = nStepsIn; |
| _length = (range.length + _nSteps - 1) / nSteps; |
| } |
| |
| size_t length() const @property |
| { |
| return _length; |
| } |
| |
| typeof(this) save() @property |
| { |
| auto ret = this; |
| ret.range = ret.range.save; |
| return ret; |
| } |
| |
| E opIndex(size_t index) |
| { |
| return range[index * _nSteps]; |
| } |
| |
| E front() @property |
| { |
| return range[0]; |
| } |
| |
| void popFront() |
| { |
| if (range.length >= _nSteps) |
| { |
| range = range[_nSteps .. range.length]; |
| _length--; |
| } |
| else |
| { |
| range = range[0 .. 0]; |
| _length = 0; |
| } |
| } |
| |
| // Pops half the range's stride. |
| void popHalf() |
| { |
| range = range[_nSteps / 2 .. range.length]; |
| } |
| |
| bool empty() const @property |
| { |
| return length == 0; |
| } |
| |
| size_t nSteps() const @property |
| { |
| return _nSteps; |
| } |
| |
| void doubleSteps() |
| { |
| _nSteps *= 2; |
| _length /= 2; |
| } |
| |
| size_t nSteps(size_t newVal) @property |
| { |
| _nSteps = newVal; |
| |
| // Using >> bsf(nSteps) is a few cycles faster than / nSteps. |
| _length = (range.length + _nSteps - 1) >> bsf(nSteps); |
| return newVal; |
| } |
| } |
| |
| // Hard-coded base case for FFT of size 2. This is actually a TON faster than |
| // using a generic slow DFT. This seems to be the best base case. (Size 1 |
| // can be coded inline as buf[0] = range[0]). |
| void slowFourier2(Ret, R)(R range, Ret buf) |
| { |
| assert(range.length == 2); |
| assert(buf.length == 2); |
| buf[0] = range[0] + range[1]; |
| buf[1] = range[0] - range[1]; |
| } |
| |
| // Hard-coded base case for FFT of size 4. Doesn't work as well as the size |
| // 2 case. |
| void slowFourier4(Ret, R)(R range, Ret buf) |
| { |
| alias C = ElementType!Ret; |
| |
| assert(range.length == 4); |
| assert(buf.length == 4); |
| buf[0] = range[0] + range[1] + range[2] + range[3]; |
| buf[1] = range[0] - range[1] * C(0, 1) - range[2] + range[3] * C(0, 1); |
| buf[2] = range[0] - range[1] + range[2] - range[3]; |
| buf[3] = range[0] + range[1] * C(0, 1) - range[2] - range[3] * C(0, 1); |
| } |
| |
| N roundDownToPowerOf2(N)(N num) |
| if (isScalarType!N && !isFloatingPoint!N) |
| { |
| import core.bitop : bsr; |
| return num & (cast(N) 1 << bsr(num)); |
| } |
| |
| @safe unittest |
| { |
| assert(roundDownToPowerOf2(7) == 4); |
| assert(roundDownToPowerOf2(4) == 4); |
| } |
| |
| template isComplexLike(T) |
| { |
| enum bool isComplexLike = is(typeof(T.init.re)) && |
| is(typeof(T.init.im)); |
| } |
| |
| @safe unittest |
| { |
| static assert(isComplexLike!(Complex!double)); |
| static assert(!isComplexLike!(uint)); |
| } |