#include "html.h"

namespace html {
  using namespace gool;
  using namespace tool;

  wchar css_istream::skip_spaces() {
    wchar ch;
    for (; pos < end; ++pos) {
      ch = *pos;
      if (ch == '\n')
        ++line_no;
      else if (!iswspace(ch))
        return ch;
    }
    return 0;
  }

  // nmstart   [_a-z]|{nonascii}|{escape}
  // nmchar    [_a-z0-9-]|{nonascii}|{escape}

  bool css_istream::scan_nmtoken(NMTOKEN_VARIATIONS ntv) {
    wchar ch = *pos;
    if (ntv == NMTOKEN_PLUS_AT) {
      if (!is_alpha(ch) && (ch != '_') && (ch != '-') && (ch != '@') && !unescape(ch))
        return false;
    } else {
      if (!is_alpha(ch) && (ch != '_') && (ch != '-') && !unescape(ch))
        return false;
    }

    _token_value.clear();
    _token_value.push(ch);

    for (++pos; pos < end; ++pos) {
      ch = *pos;

      if (ch == '@' && ntv == NMTOKEN_PLUS_AT) {
        _token_value += ch;
        continue;
      }

      if (ch == '.' && ntv == NMTOKEN_PLUS_DOT) {
        _token_value += ch;
        continue;
      }

      if (!is_alnum(ch) && (ch != '_') && (ch != '-') && !unescape(ch)) break;
      _token_value += ch;
    }
    return true;
  }

  bool css_istream::scan_attr_name() {
    wchar ch = *pos;
    if (!is_alpha(ch) && (ch != '_') && (ch != '-') && !unescape(ch))
      return false;

    _token_value.clear();
    _token_value.push(ch);

    for (++pos; pos < end; ++pos) {
      ch = *pos;
      if (!is_alnum(ch) && (ch != '_') && (ch != '-') && (ch != '!') &&
          !unescape(ch))
        break;
      _token_value += ch;
    }

    return true;
  }

  bool css_istream::scan_name() {
    wchar ch;

    _token_value.clear();

    for (; pos < end; ++pos) {
      ch = *pos;
      if (!iswalnum(ch) && (ch != '_') && (ch != '-') && !unescape(ch)) break;
      _token_value += ch;
    }

    return true;
  }

  bool css_istream::scan_chars_only() {
    wchar ch;

    _token_value.clear();

    for (; pos < end; ++pos) {
      ch = *pos;
      if (!iswalpha(ch) && !unescape(ch)) break;
      _token_value += ch;
    }
    return true;
  }

  bool css_istream::scan_number() {
    wchar ch = *pos;
    assert(is_digit(ch));
    _token_value.clear();
    _token_value.push(ch);
    for (++pos; pos < end; ++pos) {
      ch = *pos;
      if (!is_digit(ch)) break;
      _token_value += ch;
    }
    return true;
  }

  bool css_istream::scan_color() {
    assert(*pos == '#');
    _token_value.clear();
    _token_value.push('#');
    for (++pos; pos < end; ++pos) {
      wchar ch = *pos;
      if (is_xdigit(ch)) {
        if (_token_value.size() <= 8)
          _token_value += ch;
        else
          return false;
      } else
        break;
    }
    return _token_value.size() >= 4; //#FFF ... //#FFFFFFFF
  }

  int css_istream::scan_number_unit() {
    wchar ch = *pos;
    assert(is_digit(ch) || ch == '.' || ch == '-' || ch == '+' || ch == '*');

    _token_value.clear();

    if (ch == '*') {
      _token_value += ch;
      ++pos;
      return T_NUMBER_UNIT;
    }

    _token_value += ch;
    int num_dots = ch == '.' ? 1 : 0;

    for (++pos; pos < end; ++pos) {
      ch = *pos;
      if (ch == '.') {
        if (num_dots == 1) return T_FLOAT;
        ++num_dots;
      } else if (ch == 'e' || ch == 'E') {
        wchar chn = *(pos + 1);
        if ((chn == '+' || chn == '-') && is_digit(*(pos + 2))) {
          _token_value += ch;
          _token_value += chn;
          ++pos;
          continue;
        }
        if (!is_digit(chn)) break;
      } else if (!is_digit(ch))
        break;

      _token_value += ch;
    }

    if (_token_value.size() == 1 && _token_value[0] == '.') return 0;

    // char units[10] = {0,0,0};
    wchars units = wchars::range(pos, end);

    // for(int i = 0; p < end && i < 10; ++i)
    //{
    //  is_alpha(*p
    //  units[i] = (char)to_lower(*p++);
    //}

    if (units.length == 0) {
      if (num_dots)
        return T_FLOAT;
      else
        return T_INTEGER;
    } 
    else if (units.starts_with(WCHARS("dip"))) { pos += 3; _token_value += WCHARS("dip"); }
    else if (units.starts_with(WCHARS("px"))) { pos += 2;  _token_value += WCHARS("px"); }
    else if (units.starts_with(WCHARS("ppx"))) { pos += 3;  _token_value += WCHARS("ppx"); }
    else if (units.starts_with(WCHARS("em"))) {  pos += 2;  _token_value += WCHARS("em");} 
    else if (units.starts_with(WCHARS("rem"))) { pos += 3;  _token_value += WCHARS("rem"); }
    else if (units.starts_with(WCHARS("ex"))) {  pos += 2;  _token_value += WCHARS("ex");} 
    else if (units.starts_with(WCHARS("ch"))) { pos += 2;  _token_value += WCHARS("ch"); }
    else if (units.starts_with(WCHARS("in"))) {  pos += 2;  _token_value += WCHARS("in");} 
    else if (units.starts_with(WCHARS("cm"))) {  pos += 2;  _token_value += WCHARS("cm");} 
    else if (units.starts_with(WCHARS("mm"))) {  pos += 2;  _token_value += WCHARS("mm");} 
    else if (units.starts_with(WCHARS("pt"))) {  pos += 2;  _token_value += WCHARS("pt");} 
    else if (units.starts_with(WCHARS("pc"))) {  pos += 2;  _token_value += WCHARS("pc");} 
    else if (units.starts_with(WCHARS("vw"))) {  pos += 2;  _token_value += WCHARS("vw");} 
    else if (units.starts_with(WCHARS("vh"))) {  pos += 2;  _token_value += WCHARS("vh");} 
    else if (units.starts_with(WCHARS("vmin"))) {  pos += 4; _token_value += WCHARS("vmin");} 
    else if (units.starts_with(WCHARS("vmax"))) {  pos += 4; _token_value += WCHARS("vmax");} 
    else if (units.starts_with(WCHARS("%%"))) {  pos += 2; _token_value += WCHARS("%%"); } 
    else if (units.starts_with(WCHARS("%"))) { pos += 1; _token_value += WCHARS("%"); }
    else if (units.starts_with(WCHARS("fx"))) { pos += 2; _token_value += WCHARS("fx"); } 
    else if (units.starts_with(WCHARS("pr"))) { pos += 2; _token_value += WCHARS("pr"); } 
    else if (units.starts_with(WCHARS("*"))) { pos += 1; _token_value += WCHARS("*"); } 
    else if (units.starts_with(WCHARS("#"))) { pos += 1; _token_value += WCHARS("#"); } // obsolete
    else if (units.starts_with(WCHARS("s"))) { pos += 1; _token_value += WCHARS("s");  return T_DURATION; } 
    else if (units.starts_with(WCHARS("ms"))) { pos += 2; _token_value += WCHARS("ms"); return T_DURATION; } 
    else if (units.starts_with(WCHARS("deg"))) { pos += 3; _token_value += WCHARS("deg"); return T_ANGLE; } 
    else if (units.starts_with(WCHARS("grad"))) { pos += 4; _token_value += WCHARS("grad"); return T_ANGLE; } 
    else if (units.starts_with(WCHARS("rad"))) { pos += 3; _token_value += WCHARS("rad"); return T_ANGLE; } 
    else if (units.starts_with(WCHARS("turn"))) { pos += 4;  _token_value += WCHARS("turn"); return T_ANGLE; } 
    else if (units.starts_with(WCHARS("dpi"))) { pos += 3;  _token_value += WCHARS("dpi"); return T_DPI; }
    // else
    //  return T_STRING; // unknown unit
    else if (num_dots)
      return T_FLOAT;
    else
      return T_INTEGER;
    return T_NUMBER_UNIT;
  }

  bool css_istream::skip_comment() {
    assert(*pos == '/');
    if (pos >= end - 4) return false;
    if (pos[1] == '/') {
      while (pos < end) {
        if (pos[0] == '\n') {
          ++line_no;
          ++pos;
          return true;
        }
        ++pos;
      }
      return true;
    }

    if (pos[1] != '*') return false;
    pos += 2;
    while (pos < (end - 1)) {
      if (pos[0] == '*' && pos[1] == '/') {
        pos += 2;
        return true;
      } else if (*pos == '\n')
        ++line_no;
      ++pos;
    }
    return false;
  }

  // ATTN: this is not a full implementation of CSS escape.
  bool css_istream::unescape(wchar &ch) {
    // ch may contain '\'
    if (ch != ESCAPE_CHAR) return false;

    auto collect_unicode = [&](wchar &ch) -> bool {
      ch = 0;
      for (int n = 0; n < 6; ++n) {
        wchar c = *pos;
        ch      = (ch << 4) | (is_digit(c) ? c - '0' : to_lower(c) - 'a' + 10);
        if (!is_xdigit(*++pos)) {
          --pos;
          return !!n;
        }
      }
      return true;
    };

    switch (ch = *++pos) {
    case 0: return false;
    case 'n': ch = '\n'; break;
    case '\\': ch = '\\'; break;
    case 'r': ch = '\r'; break;
    case 't': ch = '\t'; break;
    case '"': ch = '"'; break;
    case '\'': ch = '\''; break;
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
    case 'a':
    case 'b':
    case 'c':
    case 'd':
    case 'e':
    case 'f':
    case 'A':
    case 'B':
    case 'C':
    case 'D':
    case 'E':
    case 'F':
      return collect_unicode(ch);
      // default:
      //     break;
    }
    return true;
  }

  bool css_istream::scan_string() {
    wchar quote = *pos++;
    assert(quote == '\'' || quote == '"');

    _token_value.clear();

    while (pos < end) {
      wchar t = *pos;
      if (t == quote) {
        ++pos;
        return true;
      }
      (void)unescape(t);
      _token_value += t;
      ++pos;
    }
    return false;
  }

  int css_istream::s_token(bool ws_ignore, bool inside_nth) {
    if (ws_ignore) skip_spaces();
    prev_pos = pos;
    wchar ch = pos < end ? *pos : 0;
    switch (ch) {
    case 0: return T_EOF;

    case '\r':
    case '\n': skip_spaces(); return ' ';
    case ' ':
    case '\t':
    case '\f':
      ++pos;
      skip_spaces();
      return ' ';

    case '#':
      ++pos;
      if (scan_name()) return T_ID;
      break;

    case ':':
      if (++pos >= end || *pos != ':') {
        if (scan_nmtoken()) return T_PSEUDO;
      } else {
        ++pos;
        if (scan_nmtoken()) return T_PSEUDO_EL;
      }
      break;

    case '.':
      ++pos;
      if (scan_name()) return T_CLASS;
      break;
    case '(':
    case ')':
    case '+':
    case '[':
    case ']':
    case '{':
    case '}':
    case '>':
    case ',': ++pos; return ch;
    case '=':
      if (++pos >= end || *pos != '=') return '=';
      ++pos;
      return T_MATCH_ICASE;
    case '~':
      ++pos;
      if (pos >= end || *pos != '=') return '~';
      ++pos;
      return T_MATCH_LIST;
    case '|':
      ++pos;
      if (pos >= end || *pos != '=') return '|';
      ++pos;
      return T_MATCH_FIRST;
    case '^':
      ++pos;
      if (pos >= end || *pos != '=') return '^';
      ++pos;
      return T_MATCH_PREFIX;
    case '$':
      ++pos;
      if (pos >= end || *pos != '=') return '$';
      ++pos;
      return T_MATCH_SUFFIX;
    case '*':
      ++pos;
      if (pos >= end || *pos != '=') return '*';
      ++pos;
      return T_MATCH_SUBSTR;
    case '%':
      ++pos;
      if (pos >= end || *pos != '=') return '%';
      ++pos;
      return T_MATCH_LIST_ICASE;

    case '@':
      ++pos;
      if (scan_nmtoken()) return T_ATRULE;
      return '@';

    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
      if (scan_number()) return T_INTEGER;
      break;

    case '/':
      if (skip_comment()) return s_token(ws_ignore);
      ++pos;
      return '/';

    case '-': // --> or -
      ++pos;
      if (pos >= end - 1) return '-';
      if (pos[0] != '-' || pos[1] != '>') return '-';
      pos += 2;
      return s_token(ws_ignore);

    case '<': // <!--
      if (pos >= end - 4 || pos[1] != '!' || pos[2] != '-' || pos[3] != '-') {
        ++pos;
        return '<';
      }
      pos += 4;
      return s_token(ws_ignore);

    case '\'':
    case '\"':
      if (scan_string()) return T_STRING;
      break;

    default:
      if (inside_nth) {
        if (scan_chars_only()) return T_NMTOKEN;
      } else {
        if (scan_nmtoken()) return T_NMTOKEN;
      }
      break;
    }
    ++pos;
    return T_ERROR;
  }

  wchars css_istream::scan_until(const wchar *delimeters) {
    wchars dels = chars_of(delimeters);
    wchars rest = wchars::range(pos, end);
    while (pos < end) {
      if (*pos == '\n')
        ++line_no;
      else if (dels.index_of(*pos) >= 0) {
        rest.length = pos - rest.start;
        ++pos;
        return rest;
      }
      ++pos;
    }
    /*wtokens tz(rest,delimeters);
    wchars rn;
    if( tz.next(rn) )
    {
      pos = rn.end() + 1;
      return rn;
    }*/
    return wchars();
  }

  int css_istream::b_token() {
    wchar ch = skip_spaces();
    prev_pos = pos;
    switch (ch) {
    case 0: return T_EOF;
    case ':':
    case ',':
    case ';':
#if defined(SCSS)
    case '^':
#endif
    case '{':
    case '}': ++pos; return ch;
    case '(':
    case ')': ++pos; return ch;

    case '/':
      if (skip_comment()) return b_token();
      ++pos;
      return '/';

    case '@':
      ++pos;
      if (scan_attr_name()) return T_ATRULE;
      return '@';

    default:
      if (scan_attr_name()) {
        if (*pos == '(' && token_value() == WCHARS("var")) {
          ++pos;
          if (scan_attr_name() && *pos == ')') {
            ++pos;
            return T_VARIABLE_NAME;
          }
          return T_ERROR;
        }
        else if (token_value().starts_with(WCHARS("--"))) {
          _token_value.remove(0, 2);
          return T_VARIABLE_NAME;
        }
        return T_NMTOKEN;
      }
      break;
    }
    ++pos;
    return T_ERROR;
  }

  int css_istream::a_token() {
    wchar ch = *pos;
    ch       = skip_spaces();
    if (ch == '/' && (pos[1] == '/' || pos[1] == '*')) {
      skip_comment();
      ch = skip_spaces();
    }
    prev_pos = pos;
    switch (ch) {
    case 0: return T_EOF;

    case '\'':
    case '\"':
      if (scan_string()) return T_STRING;
      break;

    // case '\r': case '\n':
    // case ' ' : case '\t': case '\f':
    //        skip_spaces();
    //        return ' ';
    case '#':
      if (scan_color()) return T_COLOR;
      break;

    case '@':
      ++pos;
      if (scan_nmtoken()) return T_ATRULE;
      return '@';

    case ';':
    case '{':
    case '}': ++pos; return ch;

    case '/':
    case ')':
    case ',': ++pos; return ch;

    case '-':
      if (pos[1] == '-')
        goto NMTOKEN;
    case '.':
    case '+':
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
    case '*':
      // if(scan_number_unit()) return T_NUMBER_UNIT;
      // else return T_FLOAT;
      {
        int tt = scan_number_unit();
        if (!tt) break;
        return tt;
      }

    case '!':
      ++pos;
      if (scan_nmtoken() && token_value() == WCHARS("important"))
        return T_IMPORTANT;
      break;

    case '~':
      ++pos;
      _token_value = wchars(ch);
      return T_NMTOKEN;

    default:
NMTOKEN:
      if (!scan_nmtoken(NMTOKEN_PLUS_DOT)) break;

      if (pos >= end - 2 || pos[0] != '(') return T_NMTOKEN;

      ++pos;

      if (token_value() == WCHARS("url")) {
        _token_value.clear();
        wchar non_ws = skip_spaces();
        if (non_ws == '"' || non_ws == '\'') {
          scan_string();
          non_ws = skip_spaces();
          if (non_ws != ')') return T_ERROR;
          ++pos;
        } else
          while (pos < end) {
            if (*pos == ')') {
              ++pos;
              break;
            }
            _token_value += *pos++;
          }
        return T_URL;
      }
      if (token_value() == WCHARS("calc")) {
        _token_value.clear();
        int level = 0;
        while (pos < end) {
          if (*pos == '(') {
            ++level;
          } else if (*pos == ')') {
            if (level-- == 0) {
              ++pos;
              break;
            }
          }
          _token_value += *pos++;
        }
        return T_CALC;
      }
      /* the below moved to function parser
      if( token_value() == WCHARS("rgb") )
      {
        _token_value = WCHARS("rgb(");
        while( pos < end )
        {
            if( *pos == ')' )  { _token_value += *pos++;  break;  }
            _token_value += *pos++;
        }
        return T_COLOR;
      }
      else if( token_value() == WCHARS("rgba"))
      {
        _token_value = WCHARS("rgba(");
        while( pos < end )
        {
            if( *pos == ')' )  { _token_value += *pos++;  break;  }
            _token_value += *pos++;
        }
        return T_COLOR;
      }*/
      if (token_value() == WCHARS("selector")) {
        _token_value.clear();
        while (pos < end) {
          if (*pos == ')') {
            ++pos;
            break;
          }
          _token_value += *pos++;
        }
        return T_SELECTOR;
      }
      return T_FUNCTION;
    }
    ++pos;
    return T_ERROR;
  }

} // namespace html
