
/* RegExp.c - 'RegExp' handler */
/*
        Copyright (c) 2001-2004 Terra Informatica Software, Inc.
        and Andrew Fedoniouk andrew@terrainformatica.com
        All rights reserved
*/

#include "cs.h"

#include "tool/tl_wregexp.h"

namespace tis {

  /* 'RegExp' */

  /* method handlers */
  static value CSF_ctor(VM *c);
  static value CSF_test(VM *c);
  static value CSF_exec(VM *c);
  static value CSF_positions(VM *c);

  /* file methods */
  static c_method methods[] = {
      C_METHOD_ENTRY("this", CSF_ctor),
      // C_METHOD_ENTRY( "toLocaleString",  CSF_std_toLocaleString  ),
      C_METHOD_ENTRY("test", CSF_test), C_METHOD_ENTRY("exec", CSF_exec),
      C_METHOD_ENTRY("positions", CSF_positions), C_METHOD_ENTRY(0, 0)};

  /* file properties */
  static value CSF_length(VM *c, value obj);
  static value CSF_input(VM *c, value obj);
  static value CSF_source(VM *c, value obj);
  static value CSF_index(VM *c, value obj);
  static value CSF_lastIndex(VM *c, value obj);
  static void  CSF_set_lastIndex(VM *c, value obj, value val);

  static vp_method properties[] = {
      VP_METHOD_ENTRY("length", CSF_length, 0),
      VP_METHOD_ENTRY("input", CSF_input, 0),
      VP_METHOD_ENTRY("source", CSF_source, 0),
      VP_METHOD_ENTRY("index", CSF_index, 0),
      VP_METHOD_ENTRY("lastIndex", CSF_lastIndex, CSF_set_lastIndex),
      VP_METHOD_ENTRY(0, 0, 0)};

  inline tool::wregexp *RegExpValue(VM *c, value obj) {
    return CsRegExpP(c, obj) ? (tool::wregexp *)CsCObjectValue(obj) : 0;
  }

  /* prototypes */
  static void DestroyRegExp(VM *c, value obj);

  value RegExpGetItem(VM *c, value obj, value tag) {
    if (!CsRegExpP(c, obj)) return UNDEFINED_VALUE;
    if (!CsIntegerP(tag)) CsTypeError(c, tag);

    tool::wregexp *pre = RegExpValue(c, obj);
    if (!pre) return UNDEFINED_VALUE;

    int_t idx = CsIntegerValue(tag);
    if (idx < pre->get_number_of_matches())
      return string_to_value(c, pre->get_match(idx));

    return UNDEFINED_VALUE;
  }

  void RegExpSetItem(VM *c, value obj, value tag, value value) {
    CsThrowKnownError(c, CsErrReadOnlyProperty, tag);
  }

  value RegExpNextElement(VM *c, value *index, value collection, int nr) {
    tool::wregexp *pre = RegExpValue(c, collection);
    if (!pre) return UNDEFINED_VALUE;

    if (*index == NOTHING_VALUE) // first
    {
      // FETCH(c,collection);
      if (pre->get_number_of_matches()) {
        *index = CsMakeInteger(0);
        //CsSetRVal(c, 1, *index);
        //return string_to_value(c, pre->get_match(0));
        CS_RETURN2(c, *index, string_to_value(c, pre->get_match(0)));
      }
    } else if (CsIntegerP(*index)) {
      int_t i = CsIntegerValue(*index) + 1;
      *index  = CsMakeInteger(i);
      if (i < CsTupleSize(collection)) {
        //CsSetRVal(c, 1, *index);
        //return string_to_value(c, pre->get_match(i));
        CS_RETURN2(c, *index, string_to_value(c, pre->get_match(i)));
      }
    } else
      assert(false);

    //CS_RETURN2(c, NOTHING_VALUE, NOTHING_VALUE);
    return NOTHING_VALUE;
  }

  /* CsInitRegExp - initialize the 'RegExp' obj */
  void CsInitRegExp(VM *c) {
    /* create the 'RegExp' type */
    c->regexpDispatch = CsEnterCPtrObjectType(CsGlobalScope(c), "RegExp", methods, properties);
    if (!c->regexpDispatch)
      CsInsufficientMemory(c);
    else {
      /* setup alternate handlers */
      c->regexpDispatch->destroy        = DestroyRegExp;
      c->regexpDispatch->getItem        = RegExpGetItem;
      c->regexpDispatch->setItem        = RegExpSetItem;
      c->regexpDispatch->getNextElement = RegExpNextElement;
    }
  }

  bool CsRegExpP(VM *c, value obj) { return CsIsType(obj, c->regexpDispatch); }

  /* CsMakeRegExp - make a 'RegExp' obj */
  value CsMakeRegExp(VM *c, tool::wregexp *re) {
    value v = CsMakeCPtrObject(c, c->regexpDispatch, re);
    return v;
  }

  inline void SetRegExpValue(value obj, tool::wregexp *pw) {
    CsSetCObjectValue(obj, pw);
  }

  /* CSF_ctor - built-in method 'initialize' */
  static value CSF_ctor(VM *c) {
    value flags = 0;
    value val;
    wchars src;
    CsParseArguments(c, "V=*S#|V", &val, c->regexpDispatch, &src.start,&src.length, &flags);

    // tool::wregexp* pre = RegExpValue(c,val);
    // pre->source = src;
    // pre->flags = flags;

    tool::ustring f;
    if (flags) f = value_to_string(flags);
    tool::ustring s = src;

    tool::wregexp *pre = new tool::wregexp();
    if (!pre->compile(s, f().index_of('i') >= 0, f().index_of('g') >= 0, f().index_of('m') >= 0)) {
      tool::string err = pre->get_error_string();
      delete pre;
      CsThrowKnownError(c, CsErrRegexpError, err.c_str());
      // return val;
    }
    SetRegExpValue(val, pre);

    CsCtorRes(c) = val;
    return val;
  }

  /* DestroyRegExp - destroy a file obj */
  static void DestroyRegExp(VM *c, value obj) {
    tool::wregexp *pre = RegExpValue(c, obj);
    delete pre;
    SetRegExpValue(obj, 0);
  }

  static value CSF_length(VM *c, value obj) {
    if (CsRegExpP(c, obj)) {
      tool::wregexp *pre = RegExpValue(c, obj);
      if (!pre) return CsMakeInteger(0);
      return CsMakeInteger(pre->get_number_of_matches());
    }
    return UNDEFINED_VALUE;
  }

  static value CSF_index(VM *c, value obj) {
    if (CsRegExpP(c, obj)) {
      tool::wregexp *pre = RegExpValue(c, obj);
      if (!pre) return CsMakeInteger(0);
      return CsMakeInteger(pre->get_match_start());
    }
    return UNDEFINED_VALUE;
  }
  static value CSF_lastIndex(VM *c, value obj) {
    if (CsRegExpP(c, obj)) {
      tool::wregexp *pre = RegExpValue(c, obj);
      if (!pre) return CsMakeInteger(0);
      return CsMakeInteger(pre->get_match_end());
    }
    return UNDEFINED_VALUE;
  }

  static void CSF_set_lastIndex(VM *c, value obj, value val) {
    if (CsRegExpP(c, obj)) {
      tool::wregexp *pre = RegExpValue(c, obj);
      if (pre) {
        if (!CsIntegerP(val))
          CsThrowKnownError(c, CsErrUnexpectedTypeError, val, "integer");
        uint idx = uint(CsIntegerValue(val));
        pre->m_next_index = idx < pre->m_test.length() ? idx : uint(pre->m_test.length());
        return;
      }
    }
    CsThrowKnownError(c, CsErrRegexpError, "wrong RE object");
  }


  static value CSF_input(VM *c, value obj) {
    if (CsRegExpP(c, obj)) {
      tool::wregexp *pre = RegExpValue(c, obj);
      if (!pre) return UNDEFINED_VALUE;
      return string_to_value(c, pre->m_test);
    }
    return UNDEFINED_VALUE;
  }

  static value CSF_source(VM *c, value obj) {
    if (CsRegExpP(c, obj)) {
      tool::wregexp *pre = RegExpValue(c, obj);
      if (!pre) return UNDEFINED_VALUE;
      return string_to_value(c, pre->m_pattern);
    }
    return UNDEFINED_VALUE;
  }

  /* CSF_test - built-in method 'test' */
  static value CSF_test(VM *c) {
    // return FALSE_VALUE;
    value  obj;
    wchars str;
    CsParseArguments(c, "V=*S#", &obj, c->regexpDispatch, &str.start,
                     &str.length);
    tool::wregexp *pre = RegExpValue(c, obj);
    if (!pre) return UNDEFINED_VALUE;
    return pre->exec(str) ? TRUE_VALUE : FALSE_VALUE;
  }

  static value CSF_exec(VM *c) {
    /*
        value obj;
        wchars str;
        CsParseArguments(c,"V=*S#",&obj,c->regexpDispatch,&str.start,&str.length);
        tool::wregexp* pre = RegExpValue(c,obj);
        if(!pre)
          return UNDEFINED_VALUE;

        int idx = 0;
        if(pre->get_number_of_matches())
          idx = pre->get_match_end();

    #pragma TODO("wrong return?!?")

        return pre->exec(str)? obj: NULL_VALUE;
    */

    value obj;
    value pat;
    CsParseArguments(c, "V=*V", &pat, c->regexpDispatch, &obj);

    obj                = CsToString(c, obj);
    tool::ustring test = value_to_string(obj);

    if (CsRegExpP(c, pat)) {
      tool::wregexp *pre = RegExpValue(c, pat);
      if (!pre) CsThrowKnownError(c, CsErrRegexpError, "wrong RE object");
      if (pre->exec(test))
        return pat;
      return NULL_VALUE;
    } else if (CsStringP(pat)) {
      tool::auto_ptr<tool::wregexp> pre(new tool::wregexp);
      if (!pre->compile(value_to_string(pat), false, false, false)) {
        CsThrowKnownError(c, CsErrRegexpError, "bad expression");
      }
      if (pre->exec(test)) return CsMakeRegExp(c, pre.release());
      return NULL_VALUE;
    }
    else {
      CsTypeError(c, pat);
      return UNDEFINED_VALUE;
    }
  }

  value CSF_positions(VM *c) {
    value obj;
    int_t idx = 0;
    CsParseArguments(c, "V=*i", &obj, c->regexpDispatch, &idx);
    tool::wregexp *pre = RegExpValue(c, obj);
    if (!pre) return UNDEFINED_VALUE;

    if (idx < pre->get_number_of_matches())
      CS_RETURN2(c, CsMakeInteger(pre->get_match_start(idx)),
                 CsMakeInteger(pre->get_match_end(idx)));

    return UNDEFINED_VALUE;
  }

  value CSF_string_match(VM *c) {
    value obj;
    value pat;
    CsParseArguments(c, "V*V", &obj, &pat);

    obj                = CsToString(c, obj);
    tool::ustring test = value_to_string(obj);

    if (CsRegExpP(c, pat)) {
      tool::wregexp *pre = RegExpValue(c, pat);
      if (!pre) CsThrowKnownError(c, CsErrRegexpError, "wrong RE object");
      if (pre->m_global ? pre->exec_all(test) : pre->exec(test)) {
        // if(pre->get_number_of_matches() == 1)
        //  return string_to_value( c, pre->get_match() );
        // else
        {
          value vec = CsMakeVector(c, pre->get_number_of_matches());
          PROTECT(vec);
          // CsPush(c,vec);
          for (int i = 0; i < pre->get_number_of_matches(); ++i) {
            value val = string_to_value(c, pre->get_match(i));
            CsSetVectorElement(c, vec, i, val);
          }
          // return CsPop(c);
          return vec;
        }
        // return pre->exec(test)?pat: NULL_VALUE;
      }
      return NULL_VALUE;
    } else if (CsStringP(pat)) {
      tool::auto_ptr<tool::wregexp> pre(new tool::wregexp);
      if (!pre->compile(value_to_string(pat), false, false, false)) {
        CsThrowKnownError(c, CsErrRegexpError, "bad expression");
      }
      if (pre->exec(test)) return CsMakeRegExp(c, pre.release());
      return NULL_VALUE;
    }
    else {
      CsTypeError(c, pat);
      return UNDEFINED_VALUE;
    }
  }

  bool CsIsLike(VM *c, value what, value pat) {
    if (!CsStringP(what))
      CsThrowKnownError(c, CsErrUnexpectedTypeError, what,
                        "left side of 'like' must be string");

    tool::wchars str(CsStringAddress(what), CsStringSize(what));

    if (CsStringP(pat)) {
      return tool::is_like(str, CsStringAddress(pat));
    } else if (CsRegExpP(c, pat)) {
      tool::wregexp *pre = RegExpValue(c, pat);
      if (!pre) CsThrowKnownError(c, CsErrRegexpError, "wrong RE object");
      return pre->exec(str) ? true : false;
    } else
      CsThrowKnownError(c, CsErrUnexpectedTypeError, pat,
                        "right side of 'like' must be string or regexp");
    return false;
  }

  value CSF_string_search(VM *c) {
    value obj;
    value pat;
    CsParseArguments(c, "V*V", &obj, &pat);

    obj                = CsToString(c, obj);
    tool::ustring test = value_to_string(obj);

    if (CsRegExpP(c, pat)) {
      tool::wregexp *pre = RegExpValue(c, pat);
      if (!pre) CsThrowKnownError(c, CsErrRegexpError, "wrong RE object");
      pre->m_next_index = 0;
      if (pre->exec(test))
        return CsMakeInteger(pre->get_match_start());
      else
        return CsMakeInteger(-1);
    } else if (CsStringP(pat)) {
      tool::auto_ptr<tool::wregexp> pre(new tool::wregexp);
      if (!pre->compile(value_to_string(pat), false, false, false)) {
        CsThrowKnownError(c, CsErrRegexpError, "bad expression");
      }
      if (pre->exec(test))
        return CsMakeInteger(pre->get_match_start());
      else
        return CsMakeInteger(-1);
    }
    else {
      CsTypeError(c, pat);
      return UNDEFINED_VALUE;
    }
  }

  struct replace_fun : public vargs {
    pvalue         fun;
    tool::wregexp *pre;
    replace_fun(VM *pvm, value f, tool::wregexp *re) : fun(pvm, f), pre(re) {}
    tool::ustring call() {
      // this->pre = pre;
      value r = CsCallFunction(CsCurrentScope(fun.pvm), fun.val, *this);
      return value_to_string(r);
    }
    virtual int   count() { return pre->get_number_of_matches(); }
    virtual value nth(int n) {
      return CsMakeString(fun.pvm, pre->get_match(n));
    }
  };

  value CSF_string_replace(VM *c) {
    value obj;
    value pat;
    value rep;
    CsParseArguments(c, "V*VV", &obj, &pat, &rep);

    obj                = CsToString(c, obj);
    tool::ustring test = value_to_string(obj);

    if (CsStringP(pat)) {
      if (!CsStringP(rep))
        CsThrowKnownError(c, CsErrUnexpectedTypeError, rep, "string");

      if (test.replace_all(CsStringChars(pat), CsStringChars(rep)))
        return string_to_value(c, test);
      else
        return obj;
    }

    if (!CsRegExpP(c, pat))
      CsThrowKnownError(c, CsErrRegexpError,
                        "first parameter is not a RE object");

    tool::wregexp *pre = RegExpValue(c, pat);
    if (!pre) CsThrowKnownError(c, CsErrRegexpError, "wrong RE object");

    bool g = pre->m_global;

    pre->m_test.clear();

    if (!pre->exec(test)) return obj;

    if (CsMethodP(rep)) {
      int start = 0;
      int end   = 0;

      // string_stream s(test.length());
      array<wchar> out;
      out.reserve(test.length());

      replace_fun cb(c, rep, pre);

      while (true) {
        end = pre->get_match_start(0);
        // s.put_str( (const wchar*)test + start, (const wchar*)test + end );
        // s.put_str(cb.call());
        out.push(test()(start, end));
        out.push(cb.call()());
        start = pre->get_match_end(0);
        end   = test.size();
        if (!g) break;
        if (!pre->exec(tool::wregexp::NEXT_CHUNK())) break;
      }
      // s.put_str( (const wchar*)test + start, (const wchar*)test + end );
      // return s.string_o(c);
      out.push(test()(start, end));
      return CsMakeString(c, out());
    } else if (CsStringP(rep)) {
      tool::ustring reps = value_to_string(rep);
      // reps = pre->replace(reps);
      // return string_to_value(c,reps);

      int start = 0;
      int end   = 0;

      // string_stream s(test.length());
      array<wchar> out;
      out.reserve(test.length());

      while (true) {
        end = pre->get_match_start(0);
        assert(start >= 0 && start < test.size());
        assert(end >= 0 && end < test.size());
        assert(start <= end);
        out.push(test()(start, end));
        out.push(pre->substitute(reps)());
        // s.put_str( test.c_str() + start, test.c_str() + end );
        // s.put_str( pre->substitute(reps) );
        start = pre->get_match_end(0);
        end   = test.size();

        if (!g) break;
        if (!pre->exec(tool::wregexp::NEXT_CHUNK())) break;
      }
      // s.put_str( (const wchar*)test + start, (const wchar*)test + end );
      out.push(test()(start, end));
      // return s.string_o(c);
      return CsMakeString(c, out());
    }
    else {
      CsThrowKnownError(c, CsErrUnexpectedTypeError, rep, "string");
      return UNDEFINED_VALUE;
    }
    // CsTypeError(c,rep);
  }

  value CSF_string_split(VM *c) {
    value obj;
    value pat;
    int   maxn = 0x80000;
    CsParseArguments(c, "V*V|i", &obj, &pat, &maxn);

    obj                = CsToString(c, obj);
    tool::ustring t    = value_to_string(obj);
    tool::wchars  test = t;

    const wchar *             start = test.start;
    const wchar *             end   = start;
    tool::array<tool::wchars> slices;

    if (CsRegExpP(c, pat)) {
      tool::wregexp *pre = RegExpValue(c, pat);
      if (!pre) CsThrowKnownError(c, CsErrRegexpError, "wrong RE object");

      // bool g = pre->m_global;
      pre->m_next_index = 0;
      // pre->m_global = true;
      if (pre->exec_all(test)) {
        maxn = min(maxn, pre->get_number_of_matches());
        for (int i = 0; i < maxn; ++i) {
          end = test.start + pre->get_match_position(i).begin;
          slices.push(tool::wchars::range(start, end));
          start = test.start + pre->get_match_position(i).end;
          if(pre->m_has_only_group)
            slices.push(tool::wchars::range(test.start + pre->get_match_position(i).begin,
                        test.start + pre->get_match_position(i).end));
        }
        end = test.end();
        slices.push(tool::wchars::range(start, end));
      } else
        slices.push(test);
      // pre->m_global = g;
    } else if (CsStringP(pat)) {
      tool::ustring t1   = value_to_string(pat);
      tool::wchars  spat = t1;

      if (spat.length) {
        while (true) {
          bool   found;
          wchars span = test.chop(spat, found);
          slices.push(span);
          if (!found) break;
        }
      } else {
        // split onto UTF16 code units
        while (test.length) {
          const wchar *s = test.start;
          if (!u16::getc(test)) break;
          slices.push(tool::wchars::range(s, test.start));
        }
      }

      /*for(int i = 0; *start && i < maxn; ++i )
      {
        end = str_str(start,spat.start);
        if(end == 0) break;
        slices.push( tool::wchars::range( start, end ) );
        start = end + spat.length;
      }
      end = test.end();
      slices.push( tool::wchars::range( start, end ) );
      */
    } else
      CsTypeError(c, pat);

    value vec = CsMakeVector(c, slices.size());
    CsPush(c, vec);
    for (int i = 0; i < slices.size(); ++i) {
      value frag = CsMakeCharString(c, slices[i].start, slices[i].size());
      CsSetVectorElement(c, CsTop(c), i, frag);
    }
    return CsPop(c);
  }

} // namespace tis
