//|
//|
//| Copyright (c) 2001-2005
//| Andrew Fedoniouk - andrew@terrainformatica.com
//|
//| tokenizer / lexical scanner
//|
//|

#ifndef __tl_tokenizer_h
#define __tl_tokenizer_h

//|
//|
//| (semi)universal lexical scanner
//|
//|

#include "tl_hash_table.h"
#include "tl_string.h"
#include "tl_ustring.h"
#include "tl_value.h"

namespace tool {

template <typename CHAR_TYPE> struct instream {
  typedef CHAR_TYPE char_type;

  instream(const string &url_) : url(url_), line_no(1) {}

  virtual CHAR_TYPE get_char() = 0;

  string url;
  uint   line_no;
};

class tokenz {
public:
  enum cvt_flag { cvt_no = 0, cvt_to_upper = 1, cvt_to_lower = 2 };

  enum token_types {
    TT_END_OF_TEXT = 0,
    TT_BREAK_CHAR,
    TT_WORD_VALUE,
    TT_STRING_VALUE
  };

protected:
  int      _p_state;    // current  state
  cvt_flag _p_flag;     // option   flag
  char     _p_curquote; // current  quote char

  string      _token;     // last token value
  const char *_text;      // input text
  const char *_text_end;  // input text end
  const char *_pos;       // current pos in input
  const char *_token_pos; // current token start
  const char *_prev_pos;

  string _whites;
  string _sbreak;
  string _quotes;
  char   _eschar;

  char _break_used;
  char _quote_used;

  bool _skip_cpp_comments;
  bool _skip_c_comments;

public:
  tokenz(const char *text, size_t text_length, cvt_flag flag = cvt_no);
  tokenz(const char *text, const char *text_end, cvt_flag flag = cvt_no);

  ~tokenz() {}

  void   white_chars(const char *ps) { _whites = ps; }
  void   white_chars(const string &s) { _whites = s; }
  string white_chars() const { return _whites; }

  void   break_chars(const char *ps) { _sbreak = ps; }
  void   break_chars(const string &s) { _sbreak = s; }
  string break_chars() const { return _sbreak; }

  void   quote_chars(const char *ps) { _quotes = ps; }
  void   quote_chars(const string &s) { _quotes = s; }
  string quote_chars() const { return _quotes; }

  void escape_char(char c) { _eschar = c; }

  void skip_comments(bool c, bool cpp) {
    _skip_cpp_comments = cpp;
    _skip_c_comments   = c;
  }

  int         token();
  string      token_value();
  const char *token_pos() const { return _token_pos; }

  const char *pos() const { return _pos; }
  void        pos(const char *p) { _pos = p; }

  void push_back();

  bool is_eot() const { return _pos >= _text_end; }

  char break_used() const { return _break_used; }
  char quote_used() const { return _quote_used; }

protected:
  int  sindex(char ch, const char *str);
  int  is_comment_start();
  void skip_comment(int comment_type);
};

class style_parser {
  tokenz zz;

public:
  style_parser(const char *text, int text_length) : zz(text, text_length) {
    zz.break_chars(";:{}");
    zz.escape_char(1);
    zz.quote_chars("");
    zz.white_chars(" \t\r\n");
  }
  bool parse_style_def(string &name, hash_table<string, string> &atts);
};

bool parse_named_values(const string &txt, hash_table<string, string> &atts);
bool parse_named_values(const ustring &txt, hash_table<string, ustring> &atts);

inline const wchar *get_value(const hash_table<string, ustring> &atts,
                              const char *name, const wchar *dv) {
  if (!atts.exists(name))
    return dv;
  return atts[name];
}

inline int get_value(const hash_table<string, ustring> &atts, const char *name,
                     int dv) {
  ustring v = atts[name];
  if (v.is_empty())
    return dv;
  return atoi(string(v));
}

inline bool get_value(const hash_table<string, ustring> &atts, const char *name,
                      bool dv) {
  ustring v = atts[name];
  if (v.is_empty())
    return dv;
  return v() == WCHARS("true");
}

namespace xjson {
class scanner {
public:
  enum token_t {
    T_END = 0,
    // +,-,etc. are coming literally
    T_NUMBER = 256, // 12, 0xff, etc.
    T_CURRENCY,     // 123$44.
    T_DATETIME,     // 0d2008-00-12T12:00
    T_COLOR,        //#RRGGBB or #RRGGBBAA
    T_STRING,       // "..." or '...'
    T_NAME,         // e.g. abc12

    T_INDEX_NAME,   // .name or ["name"], used by get_index_token() only
    T_INDEX_NUMBER, // [0], used by get_index_token() only
    T_ERROR,        // error while getting token
  };

private /* data */:

  const wchar *input;
  const wchar *end;
  const wchar *pos;

  int  line_no;
  bool canonic;

  // wchars       token_value_src;
  array<wchar> token_value;
  token_t      saved_token;

private /* methods */:

  token_t scan_number();
  token_t scan_color();
  token_t scan_string(wchar delimeter);
  token_t scan_nmtoken();
  void    skip_comment(bool toeol);
  wchar   scan_escape();
  bool    scan_ws() {
    while (pos < end) {
      if (*pos == '\n')
        ++line_no;
      else if (!isspace(*pos))
        break;
      ++pos;
    }
    return pos < end;
  }
  token_t scan_parenthesis();

public:
  scanner(wchars expr, bool strict = false)
      : input(expr.start), end(expr.end()), pos(expr.start), saved_token(T_END),
        line_no(1), canonic(strict) {}
  ~scanner() {}
  token_t get_token();
  wchars  get_parsed() {
    return wchars(input, pos - input);
  } // get fragment parsed so far
  wchars get_nonparsed() {
    return wchars::range(pos, end);
  } // get non parsed fragment so far
  wchars get_value() {
    if (token_value.size() == 0 || token_value.last() != 0) {
      token_value.push(0);
      token_value.pop();
    }
    return token_value();
  }
  void push_back(token_t t) { saved_token = t; }

  bool at_end() { return !scan_ws(); }

  void get_parsed_location(int &lineno, int &charno) {
    const wchar *lstart = input;
    for (const wchar *t = pos; t > input; --t)
      if (*t == '\n') {
        if (lstart == input)
          lstart = t + 1;
        ++lineno;
      }
    charno = int(pos - lstart);
  }

  token_t get_index_token(wchars &value);
};

value parse(wchars &text, bool open_model,
            function<void(wchars)> on_error = nullptr);

scanner::token_t get_index_token(wchars &text, wchars &value);

enum MODE {
  JSON,  // classic JSON
  XJSON, // xJSON
};
ustring emit(const value &v, MODE m, bool open_model);
} // namespace xjson

class source_scanner {
public:
  enum token_t {
    T_EOF = 0,
    // +,-,etc. are coming literally
    T_NUMBER = 256,  // 12, 0xff, etc.
    T_NUMBER_UNIT,   // 12em, etc.
    T_COLOR,         //#RRGGBB or #RRGGBBAA
    T_STRING,        // "..." or '...'
    T_NAME,          // e.g. abc12
    T_COMMENT,       // //... or /* */
    T_OPERATOR,      // ++, +, -, etc.
    T_OPAREN,        // (
    T_CPAREN,        // )
    T_ERROR,         // parsing error, not closed string, etc.
    T_END_OF_ISLAND, // </script>, </style>, etc.
  };

private /* data */:

  instream<wchar> *input;

  array<wchar> token_value;
  wchar        last_char;
  bool         css_like;

private /* methods */:

  wchar get_char() {
    if (last_char) {
      wchar t   = last_char;
      last_char = 0;
      return t;
    }
    return input->get_char();
  }

  token_t scan_number(wchar fc);
  token_t scan_color(wchar fc);
  token_t scan_string(wchar delimeter);
  token_t scan_nmtoken(wchar fc);
  token_t scan_operator(wchar fc);
  void    skip_comment(bool toeol);
  wchar   scan_escape();

  bool scan_ws() {
    for (wchar c = get_char(); c; c = get_char()) {
      if (!is_space(c)) {
        push_back(c);
        return true;
      }
    }
    return false;
  }
  // token_t scan_parenthesis();
  token_t _get_token();

public:
  source_scanner(instream<wchar> *pin, bool css)
      : input(pin), last_char(0), css_like(css) {}
  ~source_scanner() {}
  token_t get_token();

  wchars get_value() {
    if (token_value.size() == 0 || token_value.last() != 0) {
      token_value.push(0);
      token_value.pop();
    }
    return token_value();
  }

  void push_back(wchar c) { last_char = c; }
  bool was_push_back() const { return last_char != 0; }

  function<void(int)> token_start_cb; // if set will receive token start events
  function<void(int)> token_end_cb;   // if set will receive token start events
};

} // namespace tool

#endif //__cs_parser_h
