| /** |
| * Check the arguments to `printf` and `scanf` against the `format` string. |
| * |
| * Copyright: Copyright (C) 1999-2022 by The D Language Foundation, All Rights Reserved |
| * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) |
| * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) |
| * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d) |
| * Documentation: https://dlang.org/phobos/dmd_chkformat.html |
| * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d |
| */ |
| module dmd.chkformat; |
| |
| //import core.stdc.stdio : printf, scanf; |
| import core.stdc.ctype : isdigit; |
| |
| import dmd.astenums; |
| import dmd.cond; |
| import dmd.errors; |
| import dmd.expression; |
| import dmd.globals; |
| import dmd.identifier; |
| import dmd.mtype; |
| import dmd.target; |
| |
| |
| /****************************************** |
| * Check that arguments to a printf format string are compatible |
| * with that string. Issue errors for incompatibilities. |
| * |
| * Follows the C99 specification for printf. |
| * |
| * Takes a generous, rather than strict, view of compatiblity. |
| * For example, an unsigned value can be formatted with a signed specifier. |
| * |
| * Diagnosed incompatibilities are: |
| * |
| * 1. incompatible sizes which will cause argument misalignment |
| * 2. deferencing arguments that are not pointers |
| * 3. insufficient number of arguments |
| * 4. struct arguments |
| * 5. array and slice arguments |
| * 6. non-pointer arguments to `s` specifier |
| * 7. non-standard formats |
| * 8. undefined behavior per C99 |
| * |
| * Per the C Standard, extra arguments are ignored. |
| * |
| * No attempt is made to fix the arguments or the format string. |
| * |
| * Params: |
| * loc = location for error messages |
| * format = format string |
| * args = arguments to match with format string |
| * isVa_list = if a "v" function (format check only) |
| * |
| * Returns: |
| * `true` if errors occurred |
| * References: |
| * C99 7.19.6.1 |
| * https://www.cplusplus.com/reference/cstdio/printf/ |
| */ |
| bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list) |
| { |
| //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr); |
| size_t n, gnu_m_count; // index in args / number of Format.GNU_m |
| for (size_t i = 0; i < format.length;) |
| { |
| if (format[i] != '%') |
| { |
| ++i; |
| continue; |
| } |
| bool widthStar; |
| bool precisionStar; |
| size_t j = i; |
| const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar); |
| const slice = format[i .. j]; |
| i = j; |
| |
| if (fmt == Format.percent) |
| continue; // "%%", no arguments |
| |
| if (isVa_list) |
| { |
| // format check only |
| if (fmt == Format.error) |
| deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); |
| continue; |
| } |
| |
| if (fmt == Format.GNU_m) |
| ++gnu_m_count; |
| |
| Expression getNextArg(ref bool skip) |
| { |
| if (n == args.length) |
| { |
| if (args.length < (n + 1) - gnu_m_count) |
| deprecation(loc, "more format specifiers than %d arguments", cast(int)n); |
| else |
| skip = true; |
| return null; |
| } |
| return args[n++]; |
| } |
| |
| void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual) |
| { |
| deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", |
| prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); |
| } |
| |
| if (widthStar) |
| { |
| bool skip; |
| auto e = getNextArg(skip); |
| if (skip) |
| continue; |
| if (!e) |
| return true; |
| auto t = e.type.toBasetype(); |
| if (t.ty != Tint32 && t.ty != Tuns32) |
| errorMsg("width ", e, "int", t); |
| } |
| |
| if (precisionStar) |
| { |
| bool skip; |
| auto e = getNextArg(skip); |
| if (skip) |
| continue; |
| if (!e) |
| return true; |
| auto t = e.type.toBasetype(); |
| if (t.ty != Tint32 && t.ty != Tuns32) |
| errorMsg("precision ", e, "int", t); |
| } |
| |
| bool skip; |
| auto e = getNextArg(skip); |
| if (skip) |
| continue; |
| if (!e) |
| return true; |
| auto t = e.type.toBasetype(); |
| auto tnext = t.nextOf(); |
| const c_longsize = target.c.longsize; |
| const ptrsize = target.ptrsize; |
| |
| // Types which are promoted to int are allowed. |
| // Spec: C99 6.5.2.2.7 |
| final switch (fmt) |
| { |
| case Format.u: // unsigned int |
| case Format.d: // int |
| if (t.ty != Tint32 && t.ty != Tuns32) |
| errorMsg(null, e, fmt == Format.u ? "uint" : "int", t); |
| break; |
| |
| case Format.hhu: // unsigned char |
| case Format.hhd: // signed char |
| if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8) |
| errorMsg(null, e, fmt == Format.hhu ? "ubyte" : "byte", t); |
| break; |
| |
| case Format.hu: // unsigned short int |
| case Format.hd: // short int |
| if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16) |
| errorMsg(null, e, fmt == Format.hu ? "ushort" : "short", t); |
| break; |
| |
| case Format.lu: // unsigned long int |
| case Format.ld: // long int |
| if (!(t.isintegral() && t.size() == c_longsize)) |
| { |
| if (fmt == Format.lu) |
| errorMsg(null, e, (c_longsize == 4 ? "uint" : "ulong"), t); |
| else |
| errorMsg(null, e, (c_longsize == 4 ? "int" : "long"), t); |
| } |
| break; |
| |
| case Format.llu: // unsigned long long int |
| case Format.lld: // long long int |
| if (t.ty != Tint64 && t.ty != Tuns64) |
| errorMsg(null, e, fmt == Format.llu ? "ulong" : "long", t); |
| break; |
| |
| case Format.ju: // uintmax_t |
| case Format.jd: // intmax_t |
| if (t.ty != Tint64 && t.ty != Tuns64) |
| { |
| if (fmt == Format.ju) |
| errorMsg(null, e, "core.stdc.stdint.uintmax_t", t); |
| else |
| errorMsg(null, e, "core.stdc.stdint.intmax_t", t); |
| } |
| break; |
| |
| case Format.zd: // size_t |
| if (!(t.isintegral() && t.size() == ptrsize)) |
| errorMsg(null, e, "size_t", t); |
| break; |
| |
| case Format.td: // ptrdiff_t |
| if (!(t.isintegral() && t.size() == ptrsize)) |
| errorMsg(null, e, "ptrdiff_t", t); |
| break; |
| |
| case Format.GNU_a: // Format.GNU_a is only for scanf |
| case Format.lg: |
| case Format.g: // double |
| if (t.ty != Tfloat64 && t.ty != Timaginary64) |
| errorMsg(null, e, "double", t); |
| break; |
| |
| case Format.Lg: // long double |
| if (t.ty != Tfloat80 && t.ty != Timaginary80) |
| errorMsg(null, e, "real", t); |
| break; |
| |
| case Format.p: // pointer |
| if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray) |
| errorMsg(null, e, "void*", t); |
| break; |
| |
| case Format.n: // pointer to int |
| if (!(t.ty == Tpointer && tnext.ty == Tint32)) |
| errorMsg(null, e, "int*", t); |
| break; |
| |
| case Format.ln: // pointer to long int |
| if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) |
| errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t); |
| break; |
| |
| case Format.lln: // pointer to long long int |
| if (!(t.ty == Tpointer && tnext.ty == Tint64)) |
| errorMsg(null, e, "long*", t); |
| break; |
| |
| case Format.hn: // pointer to short |
| if (!(t.ty == Tpointer && tnext.ty == Tint16)) |
| errorMsg(null, e, "short*", t); |
| break; |
| |
| case Format.hhn: // pointer to signed char |
| if (!(t.ty == Tpointer && tnext.ty == Tint16)) |
| errorMsg(null, e, "byte*", t); |
| break; |
| |
| case Format.jn: // pointer to intmax_t |
| if (!(t.ty == Tpointer && tnext.ty == Tint64)) |
| errorMsg(null, e, "core.stdc.stdint.intmax_t*", t); |
| break; |
| |
| case Format.zn: // pointer to size_t |
| if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize)) |
| errorMsg(null, e, "size_t*", t); |
| break; |
| |
| case Format.tn: // pointer to ptrdiff_t |
| if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize)) |
| errorMsg(null, e, "ptrdiff_t*", t); |
| break; |
| |
| case Format.c: // char |
| if (t.ty != Tint32 && t.ty != Tuns32) |
| errorMsg(null, e, "char", t); |
| break; |
| |
| case Format.lc: // wint_t |
| if (t.ty != Tint32 && t.ty != Tuns32) |
| errorMsg(null, e, "wchar_t", t); |
| break; |
| |
| case Format.s: // pointer to char string |
| if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) |
| errorMsg(null, e, "char*", t); |
| break; |
| |
| case Format.ls: // pointer to wchar_t string |
| if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize)) |
| errorMsg(null, e, "wchar_t*", t); |
| break; |
| |
| case Format.error: |
| deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); |
| break; |
| |
| case Format.GNU_m: |
| break; // not assert(0) because it may go through it if there are extra arguments |
| |
| case Format.percent: |
| assert(0); |
| } |
| } |
| return false; |
| } |
| |
| /****************************************** |
| * Check that arguments to a scanf format string are compatible |
| * with that string. Issue errors for incompatibilities. |
| * |
| * Follows the C99 specification for scanf. |
| * |
| * Takes a generous, rather than strict, view of compatiblity. |
| * For example, an unsigned value can be formatted with a signed specifier. |
| * |
| * Diagnosed incompatibilities are: |
| * |
| * 1. incompatible sizes which will cause argument misalignment |
| * 2. deferencing arguments that are not pointers |
| * 3. insufficient number of arguments |
| * 4. struct arguments |
| * 5. array and slice arguments |
| * 6. non-standard formats |
| * 7. undefined behavior per C99 |
| * |
| * Per the C Standard, extra arguments are ignored. |
| * |
| * No attempt is made to fix the arguments or the format string. |
| * |
| * Params: |
| * loc = location for error messages |
| * format = format string |
| * args = arguments to match with format string |
| * isVa_list = if a "v" function (format check only) |
| * |
| * Returns: |
| * `true` if errors occurred |
| * References: |
| * C99 7.19.6.2 |
| * https://www.cplusplus.com/reference/cstdio/scanf/ |
| */ |
| bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list) |
| { |
| size_t n = 0; |
| for (size_t i = 0; i < format.length;) |
| { |
| if (format[i] != '%') |
| { |
| ++i; |
| continue; |
| } |
| bool asterisk; |
| size_t j = i; |
| const fmt = parseScanfFormatSpecifier(format, j, asterisk); |
| const slice = format[i .. j]; |
| i = j; |
| |
| if (fmt == Format.percent || asterisk) |
| continue; // "%%", "%*": no arguments |
| |
| if (isVa_list) |
| { |
| // format check only |
| if (fmt == Format.error) |
| deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); |
| continue; |
| } |
| |
| Expression getNextArg() |
| { |
| if (n == args.length) |
| { |
| if (!asterisk) |
| deprecation(loc, "more format specifiers than %d arguments", cast(int)n); |
| return null; |
| } |
| return args[n++]; |
| } |
| |
| void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual) |
| { |
| deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", |
| prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); |
| } |
| |
| auto e = getNextArg(); |
| if (!e) |
| return true; |
| |
| auto t = e.type.toBasetype(); |
| auto tnext = t.nextOf(); |
| const c_longsize = target.c.longsize; |
| const ptrsize = target.ptrsize; |
| |
| final switch (fmt) |
| { |
| case Format.n: |
| case Format.d: // pointer to int |
| if (!(t.ty == Tpointer && tnext.ty == Tint32)) |
| errorMsg(null, e, "int*", t); |
| break; |
| |
| case Format.hhn: |
| case Format.hhd: // pointer to signed char |
| if (!(t.ty == Tpointer && tnext.ty == Tint16)) |
| errorMsg(null, e, "byte*", t); |
| break; |
| |
| case Format.hn: |
| case Format.hd: // pointer to short |
| if (!(t.ty == Tpointer && tnext.ty == Tint16)) |
| errorMsg(null, e, "short*", t); |
| break; |
| |
| case Format.ln: |
| case Format.ld: // pointer to long int |
| if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == c_longsize)) |
| errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t); |
| break; |
| |
| case Format.lln: |
| case Format.lld: // pointer to long long int |
| if (!(t.ty == Tpointer && tnext.ty == Tint64)) |
| errorMsg(null, e, "long*", t); |
| break; |
| |
| case Format.jn: |
| case Format.jd: // pointer to intmax_t |
| if (!(t.ty == Tpointer && tnext.ty == Tint64)) |
| errorMsg(null, e, "core.stdc.stdint.intmax_t*", t); |
| break; |
| |
| case Format.zn: |
| case Format.zd: // pointer to size_t |
| if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize)) |
| errorMsg(null, e, "size_t*", t); |
| break; |
| |
| case Format.tn: |
| case Format.td: // pointer to ptrdiff_t |
| if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize)) |
| errorMsg(null, e, "ptrdiff_t*", t); |
| break; |
| |
| case Format.u: // pointer to unsigned int |
| if (!(t.ty == Tpointer && tnext.ty == Tuns32)) |
| errorMsg(null, e, "uint*", t); |
| break; |
| |
| case Format.hhu: // pointer to unsigned char |
| if (!(t.ty == Tpointer && tnext.ty == Tuns8)) |
| errorMsg(null, e, "ubyte*", t); |
| break; |
| |
| case Format.hu: // pointer to unsigned short int |
| if (!(t.ty == Tpointer && tnext.ty == Tuns16)) |
| errorMsg(null, e, "ushort*", t); |
| break; |
| |
| case Format.lu: // pointer to unsigned long int |
| if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == c_longsize)) |
| errorMsg(null, e, (c_longsize == 4 ? "uint*" : "ulong*"), t); |
| break; |
| |
| case Format.llu: // pointer to unsigned long long int |
| if (!(t.ty == Tpointer && tnext.ty == Tuns64)) |
| errorMsg(null, e, "ulong*", t); |
| break; |
| |
| case Format.ju: // pointer to uintmax_t |
| if (!(t.ty == Tpointer && tnext.ty == Tuns64)) |
| errorMsg(null, e, "core.stdc.stdint.uintmax_t*", t); |
| break; |
| |
| case Format.g: // pointer to float |
| if (!(t.ty == Tpointer && tnext.ty == Tfloat32)) |
| errorMsg(null, e, "float*", t); |
| break; |
| |
| case Format.lg: // pointer to double |
| if (!(t.ty == Tpointer && tnext.ty == Tfloat64)) |
| errorMsg(null, e, "double*", t); |
| break; |
| |
| case Format.Lg: // pointer to long double |
| if (!(t.ty == Tpointer && tnext.ty == Tfloat80)) |
| errorMsg(null, e, "real*", t); |
| break; |
| |
| case Format.GNU_a: |
| case Format.GNU_m: |
| case Format.c: |
| case Format.s: // pointer to char string |
| if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) |
| errorMsg(null, e, "char*", t); |
| break; |
| |
| case Format.lc: |
| case Format.ls: // pointer to wchar_t string |
| if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize)) |
| errorMsg(null, e, "wchar_t*", t); |
| break; |
| |
| case Format.p: // double pointer |
| if (!(t.ty == Tpointer && tnext.ty == Tpointer)) |
| errorMsg(null, e, "void**", t); |
| break; |
| |
| case Format.error: |
| deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); |
| break; |
| |
| case Format.percent: |
| assert(0); |
| } |
| } |
| return false; |
| } |
| |
| private: |
| |
| /************************************** |
| * Parse the *format specifier* which is of the form: |
| * |
| * `%[*][width][length]specifier` |
| * |
| * Params: |
| * format = format string |
| * idx = index of `%` of start of format specifier, |
| * which gets updated to index past the end of it, |
| * even if `Format.error` is returned |
| * asterisk = set if there is a `*` sub-specifier |
| * Returns: |
| * Format |
| */ |
| Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx, |
| out bool asterisk) nothrow pure @safe |
| { |
| auto i = idx; |
| assert(format[i] == '%'); |
| const length = format.length; |
| |
| Format error() |
| { |
| idx = i; |
| return Format.error; |
| } |
| |
| ++i; |
| if (i == length) |
| return error(); |
| |
| if (format[i] == '%') |
| { |
| idx = i + 1; |
| return Format.percent; |
| } |
| |
| // * sub-specifier |
| if (format[i] == '*') |
| { |
| ++i; |
| if (i == length) |
| return error(); |
| asterisk = true; |
| } |
| |
| // fieldWidth |
| while (isdigit(format[i])) |
| { |
| i++; |
| if (i == length) |
| return error(); |
| } |
| |
| /* Read the scanset |
| * A scanset can be anything, so we just check that it is paired |
| */ |
| if (format[i] == '[') |
| { |
| while (i < length) |
| { |
| if (format[i] == ']') |
| break; |
| ++i; |
| } |
| |
| // no `]` found |
| if (i == length) |
| return error(); |
| |
| ++i; |
| // no specifier after `]` |
| // it could be mixed with the one above, but then idx won't have the right index |
| if (i == length) |
| return error(); |
| } |
| |
| /* Read the specifier |
| */ |
| char genSpec; |
| Format specifier = parseGenericFormatSpecifier(format, i, genSpec); |
| if (specifier == Format.error) |
| return error(); |
| |
| idx = i; |
| return specifier; // success |
| } |
| |
| /************************************** |
| * Parse the *format specifier* which is of the form: |
| * |
| * `%[flags][field width][.precision][length modifier]specifier` |
| * |
| * Params: |
| * format = format string |
| * idx = index of `%` of start of format specifier, |
| * which gets updated to index past the end of it, |
| * even if `Format.error` is returned |
| * widthStar = set if * for width |
| * precisionStar = set if * for precision |
| * Returns: |
| * Format |
| */ |
| Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx, |
| out bool widthStar, out bool precisionStar) nothrow pure @safe |
| { |
| auto i = idx; |
| assert(format[i] == '%'); |
| const length = format.length; |
| bool hash; |
| bool zero; |
| bool flags; |
| bool width; |
| bool precision; |
| |
| Format error() |
| { |
| idx = i; |
| return Format.error; |
| } |
| |
| ++i; |
| if (i == length) |
| return error(); |
| |
| if (format[i] == '%') |
| { |
| idx = i + 1; |
| return Format.percent; |
| } |
| |
| /* Read the `flags` |
| */ |
| while (1) |
| { |
| const c = format[i]; |
| if (c == '-' || |
| c == '+' || |
| c == ' ') |
| { |
| flags = true; |
| } |
| else if (c == '#') |
| { |
| hash = true; |
| } |
| else if (c == '0') |
| { |
| zero = true; |
| } |
| else |
| break; |
| ++i; |
| if (i == length) |
| return error(); |
| } |
| |
| /* Read the `field width` |
| */ |
| { |
| const c = format[i]; |
| if (c == '*') |
| { |
| width = true; |
| widthStar = true; |
| ++i; |
| if (i == length) |
| return error(); |
| } |
| else if ('1' <= c && c <= '9') |
| { |
| width = true; |
| ++i; |
| if (i == length) |
| return error(); |
| while ('0' <= format[i] && format[i] <= '9') |
| { |
| ++i; |
| if (i == length) |
| return error(); |
| } |
| } |
| } |
| |
| /* Read the `precision` |
| */ |
| if (format[i] == '.') |
| { |
| precision = true; |
| ++i; |
| if (i == length) |
| return error(); |
| const c = format[i]; |
| if (c == '*') |
| { |
| precisionStar = true; |
| ++i; |
| if (i == length) |
| return error(); |
| } |
| else if ('0' <= c && c <= '9') |
| { |
| ++i; |
| if (i == length) |
| return error(); |
| while ('0' <= format[i] && format[i] <= '9') |
| { |
| ++i; |
| if (i == length) |
| return error(); |
| } |
| } |
| } |
| |
| /* Read the specifier |
| */ |
| char genSpec; |
| Format specifier = parseGenericFormatSpecifier(format, i, genSpec); |
| if (specifier == Format.error) |
| return error(); |
| |
| switch (genSpec) |
| { |
| case 'c': |
| case 's': |
| if (hash || zero) |
| return error(); |
| break; |
| |
| case 'd': |
| case 'i': |
| if (hash) |
| return error(); |
| break; |
| |
| case 'n': |
| if (hash || zero || precision || width || flags) |
| return error(); |
| break; |
| |
| default: |
| break; |
| } |
| |
| idx = i; |
| return specifier; // success |
| } |
| |
| /* Different kinds of formatting specifications, variations we don't |
| care about are merged. (Like we don't care about the difference between |
| f, e, g, a, etc.) |
| |
| For `scanf`, every format is a pointer. |
| */ |
| enum Format |
| { |
| d, // int |
| hhd, // signed char |
| hd, // short int |
| ld, // long int |
| lld, // long long int |
| jd, // intmax_t |
| zd, // size_t |
| td, // ptrdiff_t |
| u, // unsigned int |
| hhu, // unsigned char |
| hu, // unsigned short int |
| lu, // unsigned long int |
| llu, // unsigned long long int |
| ju, // uintmax_t |
| g, // float (scanf) / double (printf) |
| lg, // double (scanf) |
| Lg, // long double (both) |
| s, // char string (both) |
| ls, // wchar_t string (both) |
| c, // char (printf) |
| lc, // wint_t (printf) |
| p, // pointer |
| n, // pointer to int |
| hhn, // pointer to signed char |
| hn, // pointer to short |
| ln, // pointer to long int |
| lln, // pointer to long long int |
| jn, // pointer to intmax_t |
| zn, // pointer to size_t |
| tn, // pointer to ptrdiff_t |
| GNU_a, // GNU ext. : address to a string with no maximum size (scanf) |
| GNU_m, // GNU ext. : string corresponding to the error code in errno (printf) / length modifier (scanf) |
| percent, // %% (i.e. no argument) |
| error, // invalid format specification |
| } |
| |
| /************************************** |
| * Parse the *length specifier* and the *specifier* of the following form: |
| * `[length]specifier` |
| * |
| * Params: |
| * format = format string |
| * idx = index of of start of format specifier, |
| * which gets updated to index past the end of it, |
| * even if `Format.error` is returned |
| * genSpecifier = Generic specifier. For instance, it will be set to `d` if the |
| * format is `hdd`. |
| * Returns: |
| * Format |
| */ |
| Format parseGenericFormatSpecifier(scope const char[] format, |
| ref size_t idx, out char genSpecifier, bool useGNUExts = |
| findCondition(global.versionids, Identifier.idPool("CRuntime_Glibc"))) nothrow pure @trusted |
| { |
| const length = format.length; |
| |
| /* Read the `length modifier` |
| */ |
| const lm = format[idx]; |
| bool lm1; // if jztL |
| bool lm2; // if `hh` or `ll` |
| if (lm == 'j' || |
| lm == 'z' || |
| lm == 't' || |
| lm == 'L') |
| { |
| ++idx; |
| if (idx == length) |
| return Format.error; |
| lm1 = true; |
| } |
| else if (lm == 'h' || lm == 'l') |
| { |
| ++idx; |
| if (idx == length) |
| return Format.error; |
| lm2 = lm == format[idx]; |
| if (lm2) |
| { |
| ++idx; |
| if (idx == length) |
| return Format.error; |
| } |
| } |
| |
| /* Read the `specifier` |
| */ |
| Format specifier; |
| const sc = format[idx]; |
| genSpecifier = sc; |
| switch (sc) |
| { |
| case 'd': |
| case 'i': |
| if (lm == 'L') |
| specifier = Format.error; |
| else |
| specifier = lm == 'h' && lm2 ? Format.hhd : |
| lm == 'h' ? Format.hd : |
| lm == 'l' && lm2 ? Format.lld : |
| lm == 'l' ? Format.ld : |
| lm == 'j' ? Format.jd : |
| lm == 'z' ? Format.zd : |
| lm == 't' ? Format.td : |
| Format.d; |
| break; |
| |
| case 'u': |
| case 'o': |
| case 'x': |
| case 'X': |
| if (lm == 'L') |
| specifier = Format.error; |
| else |
| specifier = lm == 'h' && lm2 ? Format.hhu : |
| lm == 'h' ? Format.hu : |
| lm == 'l' && lm2 ? Format.llu : |
| lm == 'l' ? Format.lu : |
| lm == 'j' ? Format.ju : |
| lm == 'z' ? Format.zd : |
| lm == 't' ? Format.td : |
| Format.u; |
| break; |
| |
| case 'a': |
| if (useGNUExts) |
| { |
| // https://www.gnu.org/software/libc/manual/html_node/Dynamic-String-Input.html |
| specifier = Format.GNU_a; |
| break; |
| } |
| goto case; |
| |
| case 'f': |
| case 'F': |
| case 'e': |
| case 'E': |
| case 'g': |
| case 'G': |
| case 'A': |
| if (lm == 'L') |
| specifier = Format.Lg; |
| else if (lm1 || lm2 || lm == 'h') |
| specifier = Format.error; |
| else |
| specifier = lm == 'l' ? Format.lg : Format.g; |
| break; |
| |
| case 'c': |
| if (lm1 || lm2 || lm == 'h') |
| specifier = Format.error; |
| else |
| specifier = lm == 'l' ? Format.lc : Format.c; |
| break; |
| |
| case 's': |
| if (lm1 || lm2 || lm == 'h') |
| specifier = Format.error; |
| else |
| specifier = lm == 'l' ? Format.ls : Format.s; |
| break; |
| |
| case 'p': |
| if (lm1 || lm2 || lm == 'h' || lm == 'l') |
| specifier = Format.error; |
| else |
| specifier = Format.p; |
| break; |
| |
| case 'n': |
| if (lm == 'L') |
| specifier = Format.error; |
| else |
| specifier = lm == 'l' && lm2 ? Format.lln : |
| lm == 'l' ? Format.ln : |
| lm == 'h' && lm2 ? Format.hhn : |
| lm == 'h' ? Format.hn : |
| lm == 'j' ? Format.jn : |
| lm == 'z' ? Format.zn : |
| lm == 't' ? Format.tn : |
| Format.n; |
| break; |
| |
| case 'm': |
| if (useGNUExts) |
| { |
| // https://www.gnu.org/software/libc/manual/html_node/Other-Output-Conversions.html |
| specifier = Format.GNU_m; |
| break; |
| } |
| goto default; |
| |
| default: |
| specifier = Format.error; |
| break; |
| } |
| |
| ++idx; |
| return specifier; // success |
| } |
| |
| unittest |
| { |
| /* parseGenericFormatSpecifier |
| */ |
| |
| char genSpecifier; |
| size_t idx; |
| |
| assert(parseGenericFormatSpecifier("hhd", idx, genSpecifier) == Format.hhd); |
| assert(genSpecifier == 'd'); |
| |
| idx = 0; |
| assert(parseGenericFormatSpecifier("hn", idx, genSpecifier) == Format.hn); |
| assert(genSpecifier == 'n'); |
| |
| idx = 0; |
| assert(parseGenericFormatSpecifier("ji", idx, genSpecifier) == Format.jd); |
| assert(genSpecifier == 'i'); |
| |
| idx = 0; |
| assert(parseGenericFormatSpecifier("lu", idx, genSpecifier) == Format.lu); |
| assert(genSpecifier == 'u'); |
| |
| idx = 0; |
| assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error); |
| |
| /* parsePrintfFormatSpecifier |
| */ |
| |
| bool widthStar; |
| bool precisionStar; |
| |
| // one for each Format |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d); |
| assert(idx == 2); |
| assert(!widthStar && !precisionStar); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld); |
| assert(idx == 4); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln); |
| assert(idx == 4); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn); |
| assert(idx == 4); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent); |
| assert(idx == 2); |
| |
| // Synonyms |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g); |
| assert(idx == 2); |
| |
| idx = 0; |
| Format g = parsePrintfFormatSpecifier("%a", idx, widthStar, precisionStar); |
| assert(g == Format.g || g == Format.GNU_a); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.lg); |
| assert(idx == 3); |
| |
| // width, precision |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d); |
| assert(idx == 3); |
| assert(widthStar && !precisionStar); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d); |
| assert(idx == 4); |
| assert(!widthStar && precisionStar); |
| |
| idx = 0; |
| assert(parsePrintfFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d); |
| assert(idx == 5); |
| assert(widthStar && precisionStar); |
| |
| // Too short formats |
| { |
| foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12", |
| "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"]) |
| { |
| idx = 0; |
| assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); |
| assert(idx == s.length); |
| } |
| } |
| |
| // Undefined format combinations |
| { |
| foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", |
| "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", |
| "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls", |
| "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", |
| "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"]) |
| { |
| idx = 0; |
| assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error); |
| assert(idx == s.length); |
| } |
| } |
| |
| /* parseScanfFormatSpecifier |
| */ |
| |
| bool asterisk; |
| |
| // one for each Format |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%d", idx, asterisk) == Format.d); |
| assert(idx == 2); |
| assert(!asterisk); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%hhd", idx, asterisk) == Format.hhd); |
| assert(idx == 4); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%hd", idx, asterisk) == Format.hd); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%ld", idx, asterisk) == Format.ld); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%lld", idx, asterisk) == Format.lld); |
| assert(idx == 4); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%jd", idx, asterisk) == Format.jd); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%zd", idx, asterisk) == Format.zd); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%td", idx, asterisk,) == Format.td); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%u", idx, asterisk) == Format.u); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu); |
| assert(idx == 4); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%hu", idx, asterisk) == Format.hu); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%lu", idx, asterisk) == Format.lu); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%llu", idx, asterisk) == Format.llu); |
| assert(idx == 4); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%ju", idx, asterisk) == Format.ju); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%g", idx, asterisk) == Format.g); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%lg", idx, asterisk) == Format.lg); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%Lg", idx, asterisk) == Format.Lg); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%p", idx, asterisk) == Format.p); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%s", idx, asterisk) == Format.s); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%ls", idx, asterisk,) == Format.ls); |
| assert(idx == 3); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%%", idx, asterisk) == Format.percent); |
| assert(idx == 2); |
| |
| // Synonyms |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%i", idx, asterisk) == Format.d); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%n", idx, asterisk) == Format.n); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%o", idx, asterisk) == Format.u); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%x", idx, asterisk) == Format.u); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%f", idx, asterisk) == Format.g); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%e", idx, asterisk) == Format.g); |
| assert(idx == 2); |
| |
| idx = 0; |
| g = parseScanfFormatSpecifier("%a", idx, asterisk); |
| assert(g == Format.g || g == Format.GNU_a); |
| assert(idx == 2); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%c", idx, asterisk) == Format.c); |
| assert(idx == 2); |
| |
| // asterisk |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%*d", idx, asterisk) == Format.d); |
| assert(idx == 3); |
| assert(asterisk); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%9ld", idx, asterisk) == Format.ld); |
| assert(idx == 4); |
| assert(!asterisk); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd); |
| assert(idx == 10); |
| assert(asterisk); |
| |
| // scansets |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s); |
| assert(idx == 10); |
| assert(!asterisk); |
| |
| idx = 0; |
| assert(parseScanfFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd); |
| assert(idx == 12); |
| assert(asterisk); |
| |
| // Too short formats |
| foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19", |
| "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"]) |
| { |
| idx = 0; |
| assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); |
| assert(idx == s.length); |
| } |
| |
| |
| // Undefined format combinations |
| foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", |
| "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", |
| "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", |
| "%-", "%+", "%#", "%0", "%.", "%Ln"]) |
| { |
| idx = 0; |
| assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); |
| assert(idx == s.length); |
| |
| } |
| |
| // Invalid scansets |
| foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"]) |
| { |
| idx = 0; |
| assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error); |
| assert(idx == s.length); |
| } |
| |
| } |