| // CODYlib -*- mode:c++ -*- |
| // Copyright (C) 2020 Nathan Sidwell, nathan@acm.org |
| // License: Apache v2.0 |
| |
| // Cody |
| #include "internal.hh" |
| // C++ |
| #include <algorithm> |
| // C |
| #include <cstring> |
| // OS |
| #include <unistd.h> |
| #include <cerrno> |
| |
| // MessageBuffer code |
| |
| // Lines consist of words and end with a NEWLINE (0xa) char |
| // Whitespace characters are TAB (0x9) and SPACE (0x20) |
| // Words consist of non-whitespace chars separated by whitespace. |
| // Multiple lines in one transaction are indicated by ending non-final |
| // lines with a SEMICOLON (0x3b) word, immediately before the NEWLINE |
| // Continuations with ; preceding it |
| // Words matching regexp [-+_/%.a-zA-Z0-9]+ need no quoting. |
| // Quoting with '...' |
| // Anything outside of [-+_/%.a-zA-Z0-9] needs quoting |
| // Anything outside of <= <space> or DEL or \' or \\ needs escaping. |
| // Escapes are \\, \', \n, \t, \_, everything else as \<hex><hex>? |
| // Spaces separate words, UTF8 encoding for non-ascii chars |
| |
| namespace Cody { |
| namespace Detail { |
| |
| static const char CONTINUE = S2C(u8";"); |
| |
| void MessageBuffer::BeginLine () |
| { |
| if (!buffer.empty ()) |
| { |
| // Terminate the previous line with a continuation |
| buffer.reserve (buffer.size () + 3); |
| buffer.push_back (S2C(u8" ")); |
| buffer.push_back (CONTINUE); |
| buffer.push_back (S2C(u8"\n")); |
| } |
| lastBol = buffer.size (); |
| } |
| |
| // QUOTE means 'maybe quote', we search it for quote-needing chars |
| |
| void MessageBuffer::Append (char const *str, bool quote, size_t len) |
| { |
| if (len == ~size_t (0)) |
| len = strlen (str); |
| |
| if (!len && !quote) |
| return; |
| |
| // We want to quote characters outside of [-+_A-Za-z0-9/%.], anything |
| // that could remotely be shell-active. UTF8 encoding for non-ascii. |
| if (quote && len) |
| { |
| quote = false; |
| // Scan looking for quote-needing characters. We could just |
| // append until we find one, but that's probably confusing |
| for (size_t ix = len; ix--;) |
| { |
| unsigned char c = (unsigned char)str[ix]; |
| if (!((c >= S2C(u8"a") && c <= S2C(u8"z")) |
| || (c >= S2C(u8"A") && c <= S2C(u8"Z")) |
| || (c >= S2C(u8"0") && c <= S2C(u8"9")) |
| || c == S2C(u8"-") || c == S2C(u8"+") || c == S2C(u8"_") |
| || c == S2C(u8"/") || c == S2C(u8"%") || c == S2C(u8"."))) |
| { |
| quote = true; |
| break; |
| } |
| } |
| } |
| |
| // Maximal length of appended string |
| buffer.reserve (buffer.size () + len * (quote ? 3 : 1) + 2); |
| |
| if (quote) |
| buffer.push_back (S2C(u8"'")); |
| |
| for (auto *end = str + len; str != end;) |
| { |
| auto *e = end; |
| |
| if (quote) |
| // Look for next escape-needing char. More relaxed than |
| // the earlier needs-quoting check. |
| for (e = str; e != end; ++e) |
| { |
| unsigned char c = (unsigned char)*e; |
| if (c < S2C(u8" ") || c == 0x7f |
| || c == S2C(u8"\\") || c == S2C(u8"'")) |
| break; |
| } |
| buffer.insert (buffer.end (), str, e); |
| str = e; |
| |
| if (str == end) |
| break; |
| |
| buffer.push_back (S2C(u8"\\")); |
| switch (unsigned char c = (unsigned char)*str++) |
| { |
| case S2C(u8"\t"): |
| c = S2C(u8"t"); |
| goto append; |
| |
| case S2C(u8"\n"): |
| c = S2C(u8"n"); |
| goto append; |
| |
| case S2C(u8"'"): |
| case S2C(u8"\\"): |
| append: |
| buffer.push_back (c); |
| break; |
| |
| default: |
| // Full-on escape. Use 2 lower-case hex chars |
| for (unsigned shift = 8; shift;) |
| { |
| shift -= 4; |
| |
| char nibble = (c >> shift) & 0xf; |
| nibble += S2C(u8"0"); |
| if (nibble > S2C(u8"9")) |
| nibble += S2C(u8"a") - (S2C(u8"9") + 1); |
| buffer.push_back (nibble); |
| } |
| } |
| } |
| |
| if (quote) |
| buffer.push_back (S2C(u8"'")); |
| } |
| |
| void MessageBuffer::Append (char c) |
| { |
| buffer.push_back (c); |
| } |
| |
| void MessageBuffer::AppendInteger (unsigned u) |
| { |
| // Sigh, even though std::to_string is C++11, we support building on |
| // gcc 4.8, which is a C++11 compiler lacking std::to_string. so |
| // have something horrible. |
| std::string v (20, 0); |
| size_t len = snprintf (const_cast<char *> (v.data ()), v.size (), "%u", u); |
| v.erase (len); |
| |
| AppendWord (v); |
| } |
| |
| int MessageBuffer::Write (int fd) noexcept |
| { |
| size_t limit = buffer.size () - lastBol; |
| ssize_t count = write (fd, &buffer.data ()[lastBol], limit); |
| |
| int err = 0; |
| if (count < 0) |
| err = errno; |
| else |
| { |
| lastBol += count; |
| if (size_t (count) != limit) |
| err = EAGAIN; |
| } |
| |
| if (err != EAGAIN && err != EINTR) |
| { |
| // Reset for next message |
| buffer.clear (); |
| lastBol = 0; |
| } |
| |
| return err; |
| } |
| |
| int MessageBuffer::Read (int fd) noexcept |
| { |
| constexpr size_t blockSize = 200; |
| |
| size_t lwm = buffer.size (); |
| size_t hwm = buffer.capacity (); |
| if (hwm - lwm < blockSize / 2) |
| hwm += blockSize; |
| buffer.resize (hwm); |
| |
| auto iter = buffer.begin () + lwm; |
| ssize_t count = read (fd, &*iter, hwm - lwm); |
| buffer.resize (lwm + (count >= 0 ? count : 0)); |
| |
| if (count < 0) |
| return errno; |
| |
| if (!count) |
| // End of file |
| return -1; |
| |
| bool more = true; |
| for (;;) |
| { |
| auto newline = std::find (iter, buffer.end (), S2C(u8"\n")); |
| if (newline == buffer.end ()) |
| break; |
| more = newline != buffer.begin () && newline[-1] == CONTINUE; |
| iter = newline + 1; |
| |
| if (iter == buffer.end ()) |
| break; |
| |
| if (!more) |
| { |
| // There is no continuation, but there are chars after the |
| // newline. Truncate the buffer and return an error |
| buffer.resize (iter - buffer.begin ()); |
| return EINVAL; |
| } |
| } |
| |
| return more ? EAGAIN : 0; |
| } |
| |
| int MessageBuffer::Lex (std::vector<std::string> &result) |
| { |
| result.clear (); |
| |
| if (IsAtEnd ()) |
| return ENOENT; |
| |
| Assert (buffer.back () == S2C(u8"\n")); |
| |
| auto iter = buffer.begin () + lastBol; |
| |
| for (std::string *word = nullptr;;) |
| { |
| char c = *iter; |
| |
| ++iter; |
| if (c == S2C(u8" ") || c == S2C(u8"\t")) |
| { |
| word = nullptr; |
| continue; |
| } |
| |
| if (c == S2C(u8"\n")) |
| break; |
| |
| if (c == CONTINUE) |
| { |
| // Line continuation |
| if (word || *iter != S2C(u8"\n")) |
| goto malformed; |
| ++iter; |
| break; |
| } |
| |
| if (c <= S2C(u8" ") || c >= 0x7f) |
| goto malformed; |
| |
| if (!word) |
| { |
| result.emplace_back (); |
| word = &result.back (); |
| } |
| |
| if (c == S2C(u8"'")) |
| { |
| // Quoted word |
| for (;;) |
| { |
| c = *iter; |
| |
| if (c == S2C(u8"\n")) |
| { |
| malformed:; |
| result.clear (); |
| iter = std::find (iter, buffer.end (), S2C(u8"\n")); |
| auto back = iter; |
| if (back[-1] == CONTINUE && back[-2] == S2C(u8" ")) |
| // Smells like a line continuation |
| back -= 2; |
| result.emplace_back (&buffer[lastBol], |
| back - buffer.begin () - lastBol); |
| ++iter; |
| lastBol = iter - buffer.begin (); |
| return EINVAL; |
| } |
| |
| if (c < S2C(u8" ") || c >= 0x7f) |
| goto malformed; |
| |
| ++iter; |
| if (c == S2C(u8"'")) |
| break; |
| |
| if (c == S2C(u8"\\")) |
| // escape |
| switch (c = *iter) |
| { |
| case S2C(u8"\\"): |
| case S2C(u8"'"): |
| ++iter; |
| break; |
| |
| case S2C(u8"n"): |
| c = S2C(u8"\n"); |
| ++iter; |
| break; |
| |
| case S2C(u8"_"): |
| // We used to escape SPACE as \_, so accept that |
| c = S2C(u8" "); |
| ++iter; |
| break; |
| |
| case S2C(u8"t"): |
| c = S2C(u8"\t"); |
| ++iter; |
| break; |
| |
| default: |
| { |
| unsigned v = 0; |
| for (unsigned nibble = 0; nibble != 2; nibble++) |
| { |
| c = *iter; |
| if (c < S2C(u8"0")) |
| { |
| if (!nibble) |
| goto malformed; |
| break; |
| } |
| else if (c <= S2C(u8"9")) |
| c -= S2C(u8"0"); |
| else if (c < S2C(u8"a")) |
| { |
| if (!nibble) |
| goto malformed; |
| break; |
| } |
| else if (c <= S2C(u8"f")) |
| c -= S2C(u8"a") - 10; |
| else |
| { |
| if (!nibble) |
| goto malformed; |
| break; |
| } |
| ++iter; |
| v = (v << 4) | c; |
| } |
| c = v; |
| } |
| } |
| word->push_back (c); |
| } |
| } |
| else |
| // Unquoted character |
| word->push_back (c); |
| } |
| lastBol = iter - buffer.begin (); |
| if (result.empty ()) |
| return ENOENT; |
| |
| return 0; |
| } |
| |
| void MessageBuffer::LexedLine (std::string &str) |
| { |
| if (lastBol) |
| { |
| size_t pos = lastBol - 1; |
| for (; pos; pos--) |
| if (buffer[pos-1] == S2C(u8"\n")) |
| break; |
| |
| size_t end = lastBol - 1; |
| if (buffer[end-1] == CONTINUE && buffer[end-2] == S2C(u8" ")) |
| // Strip line continuation |
| end -= 2; |
| str.append (&buffer[pos], end - pos); |
| } |
| } |
| } // Detail |
| } // Cody |