
/* Compiler implementation of the D programming language
 * Copyright (C) 1999-2019 by The D Language Foundation, All Rights Reserved
 * written by Walter Bright
 * http://www.digitalmars.com
 * Distributed under the Boost Software License, Version 1.0.
 * http://www.boost.org/LICENSE_1_0.txt
 * https://github.com/D-Programming-Language/dmd/blob/master/src/macro.c
 */

/* Simple macro text processor.
 */

#include "root/dsystem.h"

#include "mars.h"
#include "errors.h"
#include "root/rmem.h"
#include "root/root.h"

#include "macro.h"

bool isIdStart(const utf8_t *p);
bool isIdTail(const utf8_t *p);
int utfStride(const utf8_t *p);

utf8_t *memdup(const utf8_t *p, size_t len)
{
    return (utf8_t *)memcpy(mem.xmalloc(len), p, len);
}

Macro::Macro(const utf8_t *name, size_t namelen, const utf8_t *text, size_t textlen)
{
    next = NULL;

    this->name = name;
    this->namelen = namelen;

    this->text = text;
    this->textlen = textlen;
    inuse = 0;
}


Macro *Macro::search(const utf8_t *name, size_t namelen)
{   Macro *table;

    //printf("Macro::search(%.*s)\n", namelen, name);
    for (table = this; table; table = table->next)
    {
        if (table->namelen == namelen &&
            memcmp(table->name, name, namelen) == 0)
        {
            //printf("\tfound %d\n", table->textlen);
            break;
        }
    }
    return table;
}

Macro *Macro::define(Macro **ptable, const utf8_t *name, size_t namelen, const utf8_t *text, size_t textlen)
{
    //printf("Macro::define('%.*s' = '%.*s')\n", namelen, name, textlen, text);

    Macro *table;

    //assert(ptable);
    for (table = *ptable; table; table = table->next)
    {
        if (table->namelen == namelen &&
            memcmp(table->name, name, namelen) == 0)
        {
            table->text = text;
            table->textlen = textlen;
            return table;
        }
    }
    table = new Macro(name, namelen, text, textlen);
    table->next = *ptable;
    *ptable = table;
    return table;
}

/**********************************************************
 * Given buffer p[0..end], extract argument marg[0..marglen].
 * Params:
 *      n       0:      get entire argument
 *              1..9:   get nth argument
 *              -1:     get 2nd through end
 */

size_t extractArgN(const utf8_t *p, size_t end, const utf8_t **pmarg, size_t *pmarglen, int n)
{
    /* Scan forward for matching right parenthesis.
     * Nest parentheses.
     * Skip over "..." and '...' strings inside HTML tags.
     * Skip over <!-- ... --> comments.
     * Skip over previous macro insertions
     * Set marglen.
     */
    unsigned parens = 1;
    unsigned char instring = 0;
    unsigned incomment = 0;
    unsigned intag = 0;
    unsigned inexp = 0;
    int argn = 0;

    size_t v = 0;

  Largstart:
    // Skip first space, if any, to find the start of the macro argument
    if (n != 1 && v < end && isspace(p[v]))
        v++;
    *pmarg = p + v;

    for (; v < end; v++)
    {   utf8_t c = p[v];

        switch (c)
        {
            case ',':
                if (!inexp && !instring && !incomment && parens == 1)
                {
                    argn++;
                    if (argn == 1 && n == -1)
                    {   v++;
                        goto Largstart;
                    }
                    if (argn == n)
                        break;
                    if (argn + 1 == n)
                    {   v++;
                        goto Largstart;
                    }
                }
                continue;

            case '(':
                if (!inexp && !instring && !incomment)
                    parens++;
                continue;

            case ')':
                if (!inexp && !instring && !incomment && --parens == 0)
                {
                    break;
                }
                continue;

            case '"':
            case '\'':
                if (!inexp && !incomment && intag)
                {
                    if (c == instring)
                        instring = 0;
                    else if (!instring)
                        instring = c;
                }
                continue;

            case '<':
                if (!inexp && !instring && !incomment)
                {
                    if (v + 6 < end &&
                        p[v + 1] == '!' &&
                        p[v + 2] == '-' &&
                        p[v + 3] == '-')
                    {
                        incomment = 1;
                        v += 3;
                    }
                    else if (v + 2 < end &&
                        isalpha(p[v + 1]))
                        intag = 1;
                }
                continue;

            case '>':
                if (!inexp)
                    intag = 0;
                continue;

            case '-':
                if (!inexp &&
                    !instring &&
                    incomment &&
                    v + 2 < end &&
                    p[v + 1] == '-' &&
                    p[v + 2] == '>')
                {
                    incomment = 0;
                    v += 2;
                }
                continue;

            case 0xFF:
                if (v + 1 < end)
                {
                    if (p[v + 1] == '{')
                        inexp++;
                    else if (p[v + 1] == '}')
                        inexp--;
                }
                continue;

            default:
                continue;
        }
        break;
    }
    if (argn == 0 && n == -1)
        *pmarg = p + v;
    *pmarglen = p + v - *pmarg;
    //printf("extractArg%d('%.*s') = '%.*s'\n", n, end, p, *pmarglen, *pmarg);
    return v;
}


/*****************************************************
 * Expand macro in place in buf.
 * Only look at the text in buf from start to end.
 */

void Macro::expand(OutBuffer *buf, size_t start, size_t *pend,
        const utf8_t *arg, size_t arglen)
{
    // limit recursive expansion
    static int nest;
    static const int nestLimit = 1000;
    if (nest > nestLimit)
    {
        error(Loc(), "DDoc macro expansion limit exceeded; more than %d "
            "expansions.", nestLimit);
        return;
    }
    nest++;

    size_t end = *pend;
    assert(start <= end);
    assert(end <= buf->offset);

    /* First pass - replace $0
     */
    arg = memdup(arg, arglen);
    for (size_t u = start; u + 1 < end; )
    {
        utf8_t *p = (utf8_t *)buf->data;   // buf->data is not loop invariant

        /* Look for $0, but not $$0, and replace it with arg.
         */
        if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+'))
        {
            if (u > start && p[u - 1] == '$')
            {   // Don't expand $$0, but replace it with $0
                buf->remove(u - 1, 1);
                end--;
                u += 1; // now u is one past the closing '1'
                continue;
            }

            utf8_t c = p[u + 1];
            int n = (c == '+') ? -1 : c - '0';

            const utf8_t *marg;
            size_t marglen;
            if (n == 0)
            {
                marg = arg;
                marglen = arglen;
            }
            else
                extractArgN(arg, arglen, &marg, &marglen, n);
            if (marglen == 0)
            {   // Just remove macro invocation
                //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg);
                buf->remove(u, 2);
                end -= 2;
            }
            else if (c == '+')
            {
                // Replace '$+' with 'arg'
                //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], marglen, marg);
                buf->remove(u, 2);
                buf->insert(u, marg, marglen);
                end += marglen - 2;

                // Scan replaced text for further expansion
                size_t mend = u + marglen;
                expand(buf, u, &mend, NULL, 0);
                end += mend - (u + marglen);
                u = mend;
            }
            else
            {
                // Replace '$1' with '\xFF{arg\xFF}'
                //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], marglen, marg);
                buf->data[u] = 0xFF;
                buf->data[u + 1] = '{';
                buf->insert(u + 2, marg, marglen);
                buf->insert(u + 2 + marglen, (const char *)"\xFF}", 2);
                end += -2 + 2 + marglen + 2;

                // Scan replaced text for further expansion
                size_t mend = u + 2 + marglen;
                expand(buf, u + 2, &mend, NULL, 0);
                end += mend - (u + 2 + marglen);
                u = mend;
            }
            //printf("u = %d, end = %d\n", u, end);
            //printf("#%.*s#\n", end, &buf->data[0]);
            continue;
        }

        u++;
    }

    /* Second pass - replace other macros
     */
    for (size_t u = start; u + 4 < end; )
    {
        utf8_t *p = (utf8_t *)buf->data;   // buf->data is not loop invariant

        /* A valid start of macro expansion is $(c, where c is
         * an id start character, and not $$(c.
         */
        if (p[u] == '$' && p[u + 1] == '(' && isIdStart(p+u+2))
        {
            //printf("\tfound macro start '%c'\n", p[u + 2]);
            utf8_t *name = p + u + 2;
            size_t namelen = 0;

            const utf8_t *marg;
            size_t marglen;

            size_t v;
            /* Scan forward to find end of macro name and
             * beginning of macro argument (marg).
             */
            for (v = u + 2; v < end; v+=utfStride(p+v))
            {

                if (!isIdTail(p+v))
                {   // We've gone past the end of the macro name.
                    namelen = v - (u + 2);
                    break;
                }
            }

            v += extractArgN(p + v, end - v, &marg, &marglen, 0);
            assert(v <= end);

            if (v < end)
            {   // v is on the closing ')'
                if (u > start && p[u - 1] == '$')
                {   // Don't expand $$(NAME), but replace it with $(NAME)
                    buf->remove(u - 1, 1);
                    end--;
                    u = v;      // now u is one past the closing ')'
                    continue;
                }

                Macro *m = search(name, namelen);

                if (!m)
                {
                    static const char undef[] = "DDOC_UNDEFINED_MACRO";
                    m = search((const utf8_t *)undef, strlen(undef));
                    if (m)
                    {
                        // Macro was not defined, so this is an expansion of
                        //   DDOC_UNDEFINED_MACRO. Prepend macro name to args.
                        // marg = name[ ] ~ "," ~ marg[ ];
                        if (marglen)
                        {
                            utf8_t *q = (utf8_t *)mem.xmalloc(namelen + 1 + marglen);
                            assert(q);
                            memcpy(q, name, namelen);
                            q[namelen] = ',';
                            memcpy(q + namelen + 1, marg, marglen);
                            marg = q;
                            marglen += namelen + 1;
                        }
                        else
                        {
                            marg = name;
                            marglen = namelen;
                        }
                    }
                }

                if (m)
                {
                    if (m->inuse && marglen == 0)
                    {   // Remove macro invocation
                        buf->remove(u, v + 1 - u);
                        end -= v + 1 - u;
                    }
                    else if (m->inuse &&
                             ((arglen == marglen && memcmp(arg, marg, arglen) == 0) ||
                              (arglen + 4 == marglen &&
                               marg[0] == 0xFF &&
                               marg[1] == '{' &&
                               memcmp(arg, marg + 2, arglen) == 0 &&
                               marg[marglen - 2] == 0xFF &&
                               marg[marglen - 1] == '}'
                              )
                             )
                            )
                    {
                        /* Recursive expansion:
                         *   marg is same as arg (with blue paint added)
                         * Just leave in place.
                         */
                    }
                    else
                    {
                        //printf("\tmacro '%.*s'(%.*s) = '%.*s'\n", m->namelen, m->name, marglen, marg, m->textlen, m->text);
                        marg = memdup(marg, marglen);
                        // Insert replacement text
                        buf->spread(v + 1, 2 + m->textlen + 2);
                        buf->data[v + 1] = 0xFF;
                        buf->data[v + 2] = '{';
                        memcpy(buf->data + v + 3, m->text, m->textlen);
                        buf->data[v + 3 + m->textlen] = 0xFF;
                        buf->data[v + 3 + m->textlen + 1] = '}';

                        end += 2 + m->textlen + 2;

                        // Scan replaced text for further expansion
                        m->inuse++;
                        size_t mend = v + 1 + 2+m->textlen+2;
                        expand(buf, v + 1, &mend, marg, marglen);
                        end += mend - (v + 1 + 2+m->textlen+2);
                        m->inuse--;

                        buf->remove(u, v + 1 - u);
                        end -= v + 1 - u;
                        u += mend - (v + 1);
                        mem.xfree(const_cast<utf8_t *>(marg));
                        //printf("u = %d, end = %d\n", u, end);
                        //printf("#%.*s#\n", end - u, &buf->data[u]);
                        continue;
                    }
                }
                else
                {
                    // Replace $(NAME) with nothing
                    buf->remove(u, v + 1 - u);
                    end -= (v + 1 - u);
                    continue;
                }
            }
        }
        u++;
    }
    mem.xfree(const_cast<utf8_t *>(arg));
    *pend = end;
    nest--;
}
