(*
 * This file is part of Barista.
 * Copyright (C) 2007-2014 Xavier Clerc.
 *
 * Barista is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Barista is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

IFNDEF USE_JDK THEN

let (+=) = Utils.(+=)

type t = CamomileLibrary.UTF8.t

BARISTA_ERROR =
  | Unable_to_convert_to_bytes of (str : t) ->
      Printf.sprintf "unable to convert to bytes (%S)" str
  | Unable_to_convert_from_bytes of (bytes : Bytes.t) ->
      Printf.sprintf "unable to convert from bytes (%S)"
        (Bytes.to_string bytes)
  | Unable_to_convert_to_utf8 of (str : string) ->
      Printf.sprintf "unable to convert to UTF8 (%S)" str
  | Unable_to_convert_from_utf8 of (str : t) ->
      Printf.sprintf "unable to convert from UTF8 (%S)" str
  | Invalid_index of (idx : int) * (len : int) ->
      Printf.sprintf "invalid index (%d, %d)" idx len
  | Invalid_substring of (idx1 : int) * (idx2 : int) * (len : int) ->
      Printf.sprintf "invalid substring (%d, %d, %d)" idx1 idx2 len
  | Invalid_escaped_string of (str : t) ->
      Printf.sprintf "invalid escaped string (%S)" str

external camomile_of_uchar : UChar.t -> CamomileLibrary.UChar.t =
  "%identity"

external uchar_of_camomile : CamomileLibrary.UChar.t -> UChar.t =
  "%identity"

external string_of_bytes : Bytes.t -> string =
  "%identity"

external bytes_of_string : string -> Bytes.t =
  "%identity"

let make_of_list l =
  let res = CamomileLibrary.UTF8.Buf.create (List.length l) in
  List.iter
    (fun ch ->
      ch
      |> camomile_of_uchar
      |> CamomileLibrary.UTF8.Buf.add_char res)
    l;
  CamomileLibrary.UTF8.Buf.contents res

let to_string str =
  try
    let res = Buffer.create (String.length str) in
    CamomileLibrary.UTF8.iter
      (fun ch ->
        ch
        |> CamomileLibrary.UChar.char_of
        |> Buffer.add_char res) str;
    Buffer.contents res
  with _ ->
    fail (Unable_to_convert_from_utf8 str)

let to_string_noerr str =
  try
    to_string str
  with _ ->
    "???"

let of_string str =
  try
    let len = String.length str in
    CamomileLibrary.UTF8.init
      len
      (fun idx -> CamomileLibrary.UChar.of_char str.[idx])
  with _ ->
    fail (Unable_to_convert_to_utf8 str)

let to_bytes str =
  try
    CamomileLibrary.UTF8.validate str;
    let res = ByteBuffer.make_of_size (CamomileLibrary.UTF8.length str) in
    CamomileLibrary.UTF8.iter
      (fun ch ->
        let add_char ch =
          ByteBuffer.add_byte res ch in
        let code = CamomileLibrary.UChar.uint_code ch in
        if (code >= 0x0001) && (code <= 0x007F) then
          add_char code
        else if (code = 0x0000) || ((code >= 0x0080) && (code <= 0x07FF)) then begin
          add_char ((0x00C0 lor (code lsr 6)));
          add_char ((0x0080 lor (code land 0x003F)))
        end else if (code >= 0x0800) && (code <= 0xFFFF) then begin
          add_char ((0x00E0 lor (code lsr 12)));
          add_char ((0x0080 lor ((code lsr 6) land 0x003F)));
          add_char ((0x0080 lor (code land 0x003F)))
        end else begin
          add_char 0x00ED;
          add_char ((0x00A0 lor (((code lsr 16) - 1) land 0x000F)));
          add_char ((0x0080 lor ((code lsr 10) land 0x003F)));
          add_char 0x00ED;
          add_char ((0x00B0 lor ((code lsr 6) land 0x000F)));
          add_char ((0x0080 lor (code land 0x003F)))
        end)
      str;
    ByteBuffer.contents res
  with _ ->
    fail (Unable_to_convert_to_bytes str)

let of_bytes bytes =
  let bytes = string_of_bytes bytes in
  try
    let len = String.length bytes in
    let res = CamomileLibrary.UTF8.Buf.create len in
    let add_char x =
      CamomileLibrary.UTF8.Buf.add_char res (CamomileLibrary.UChar.chr x) in
    let i = ref 0 in
    while !i < len do
      let code = int_of_char bytes.[!i] in
      i += 1;
      if (code land 0x0080) = 0 then
        add_char code
      else if (code land 0x00E0) = 0x00C0 then begin
        let code'1 = int_of_char bytes.[!i] in
        i += 1;
        assert ((code'1 land 0x00C0) = 0x0080);
        let v = ((code land 0x001F) lsl 6) lor (code'1 land 0x003F) in
        add_char v
      end else if (code land 0x00F0) = 0x00E0 then begin
        let code'1 = int_of_char bytes.[!i] in
        let code'2 = int_of_char bytes.[!i + 1] in
        i += 2;
        assert ((code'1 land 0x00C0) = 0x0080);
        assert ((code'2 land 0x00C0) = 0x0080);
        let v = ((code land 0x000F) lsl 12)
            lor ((code'1 land 0x003F) lsl 6)
            lor (code'2 land 0x003F) in
        add_char v
      end else begin
        let code'1 = int_of_char bytes.[!i] in
        let code'2 = int_of_char bytes.[!i + 1] in
        let code'3 = int_of_char bytes.[!i + 2] in
        let code'4 = int_of_char bytes.[!i + 3] in
        let code'5 = int_of_char bytes.[!i + 4] in
        i += 5;
        assert (code = 0x00ED);
        assert ((code'1 land 0x00F0) = 0x00A0);
        assert ((code'2 land 0x00C0) = 0x0080);
        assert (code'3 = 0x00ED);
        assert ((code'4 land 0x00F0) = 0x00B0);
        assert ((code'5 land 0x00C0) = 0x0080);
        let v = 0x010000
            lor ((code'1 land 0x000F) lsl 16)
            lor ((code'2 land 0x003F) lsl 10)
            lor ((code'4 land 0x000F) lsl 6)
            lor (code'5 land 0x003F) in
        add_char v
      end
    done;
    let res = CamomileLibrary.UTF8.Buf.contents res in
    CamomileLibrary.UTF8.validate res;
    res
  with _ ->
    fail (Unable_to_convert_from_bytes (bytes_of_string bytes))

module Encoding =
  CamomileLibrary.CharEncoding.Configure (CamomileLibraryDefault.Config)

let to_latin1 str =
  Encoding.(recode_string ~in_enc:utf8 ~out_enc:latin1 str)
  |> bytes_of_string

let of_latin1 bytes =
  let bytes = string_of_bytes bytes in
  Encoding.(recode_string ~in_enc:latin1 ~out_enc:utf8 bytes)

let of_char ch =
  try
    let res = CamomileLibrary.UTF8.Buf.create 4 in
    CamomileLibrary.UTF8.Buf.add_char
      res
      (camomile_of_uchar (UChar.of_char ch));
    CamomileLibrary.UTF8.Buf.contents res
  with _ ->
    fail (Unable_to_convert_to_utf8 (Char.escaped ch))

let of_uchar ch =
  CamomileLibrary.UTF8.init 1 (fun _ -> camomile_of_uchar ch)

let length str =
  CamomileLibrary.UTF8.length str

let get str idx =
  try
    CamomileLibrary.UTF8.get str idx
    |> uchar_of_camomile
  with _ ->
    fail (Invalid_index (idx, length str))

let equal str1 str2 =
  (str1 == str2) || ((CamomileLibrary.UTF8.compare str1 str2) = 0)

let compare str1 str2 =
  if str1 == str2 then
    0
  else
    CamomileLibrary.UTF8.compare str1 str2

let hash str =
  let res = ref 0 in
  CamomileLibrary.UTF8.iter
    (fun ch ->
      res := 17 * !res + (CamomileLibrary.UChar.code ch))
    str;
  !res

let gen_index_from update_utf8_index update_int_index =
  fun s i c ->
    let c = camomile_of_uchar c in
    let idx = ref (CamomileLibrary.UTF8.move s (CamomileLibrary.UTF8.first s) i) in
    let res = ref i in
    while (not (CamomileLibrary.UTF8.out_of_range s !idx))
        && (not (CamomileLibrary.UChar.eq c (CamomileLibrary.UTF8.look s !idx))) do
      idx := update_utf8_index s !idx;
      update_int_index res
    done;
    if CamomileLibrary.UTF8.out_of_range s !idx then
      raise Not_found
    else
      !res

let index_from str idx ch =
  gen_index_from CamomileLibrary.UTF8.next incr str idx ch

let rindex_from str idx ch =
  gen_index_from CamomileLibrary.UTF8.prev decr str idx ch

let substring str first last =
  let len = Utils.max_int 0 (last - first + 1) in
  try
    let res = CamomileLibrary.UTF8.Buf.create len in
    let idx = ref (CamomileLibrary.UTF8.move str (CamomileLibrary.UTF8.first str) first) in
    let i = ref 0 in
    while !i < len do
      if CamomileLibrary.UTF8.out_of_range str !idx then
        fail (Invalid_substring (first, last, length str));
      CamomileLibrary.UTF8.Buf.add_char res (CamomileLibrary.UTF8.look str !idx);
      idx := CamomileLibrary.UTF8.next str !idx;
      incr i
    done;
    CamomileLibrary.UTF8.Buf.contents res
  with _ ->
    fail (Invalid_substring (first, last, length str))

let (++) str1 str2 =
  let len = (CamomileLibrary.UTF8.length str1) + (CamomileLibrary.UTF8.length str2) in
  let res = CamomileLibrary.UTF8.Buf.create len in
  CamomileLibrary.UTF8.Buf.add_string res str1;
  CamomileLibrary.UTF8.Buf.add_string res str2;
  CamomileLibrary.UTF8.Buf.contents res

let concat l =
  let len =
    List.fold_left
      (fun acc elem -> acc + (CamomileLibrary.UTF8.length elem))
      0
      l in
  let res = CamomileLibrary.UTF8.Buf.create len in
  List.iter (CamomileLibrary.UTF8.Buf.add_string res) l;
  CamomileLibrary.UTF8.Buf.contents res

let concat_sep sep l =
  let sep_len = CamomileLibrary.UTF8.length sep in
  let len =
    List.fold_left
      (fun acc elem -> acc + sep_len + (CamomileLibrary.UTF8.length elem))
      0
      l in
  let len = Utils.max_int 0 (len - sep_len) in
  let res = CamomileLibrary.UTF8.Buf.create len in
  (match l with
  | hd :: tl ->
      CamomileLibrary.UTF8.Buf.add_string res hd;
      List.iter
        (fun x ->
          CamomileLibrary.UTF8.Buf.add_string res sep;
          CamomileLibrary.UTF8.Buf.add_string res x)
        tl
  | [] -> ());
  CamomileLibrary.UTF8.Buf.contents res

let concat_sep_map sep f l =
  List.map f l
  |> concat_sep sep

let concat_sep_map_last sep f1 f2 l =
  (match List.rev l with
  | hd :: tl -> (f2 hd) :: (List.map f1 tl)
  | [] -> [])
  |> List.rev
  |> concat_sep sep

let replace ch1 ch2 str =
  let ch1 = camomile_of_uchar ch1 in
  let ch2 = camomile_of_uchar ch2 in
  let res = CamomileLibrary.UTF8.Buf.create (CamomileLibrary.UTF8.length str) in
  CamomileLibrary.UTF8.iter
    (fun ch ->
      CamomileLibrary.UTF8.Buf.add_char
        res
        (if CamomileLibrary.UChar.eq ch ch1 then
          ch2
        else
          ch))
    str;
  CamomileLibrary.UTF8.Buf.contents res

let contains ch str =
  try
    ignore (index_from str 0 ch);
    true
  with _ ->
    false

let split ch str =
  let res = ref [] in
  let len = CamomileLibrary.UTF8.length str in
  let curr = CamomileLibrary.UTF8.Buf.create len in
  let ch = camomile_of_uchar ch in
  CamomileLibrary.UTF8.iter
    (fun x ->
      if CamomileLibrary.UChar.eq ch x then begin
        res := (CamomileLibrary.UTF8.Buf.contents curr) :: !res;
        CamomileLibrary.UTF8.Buf.clear curr
      end else
        CamomileLibrary.UTF8.Buf.add_char curr x)
    str;
  let last = CamomileLibrary.UTF8.Buf.contents curr in
  if CamomileLibrary.UTF8.length last > 0 then
    res := last :: !res;
  List.rev !res

type split_state =
  | Outside_quotes
  | Inside_simple_quotes
  | Inside_double_quotes

let double_quote = camomile_of_uchar (UChar.of_char '"')

let simple_quote = camomile_of_uchar (UChar.of_char '\'')

let split_quotes ch str =
  let state = ref Outside_quotes in
  let res = ref [] in
  let len = CamomileLibrary.UTF8.length str in
  let curr = CamomileLibrary.UTF8.Buf.create len in
  let ch = camomile_of_uchar ch in
  CamomileLibrary.UTF8.iter
    (fun x ->
      if CamomileLibrary.UChar.eq ch x then begin
        match !state with
        | Outside_quotes ->
            res := (CamomileLibrary.UTF8.Buf.contents curr) :: !res;
            CamomileLibrary.UTF8.Buf.clear curr
        | Inside_simple_quotes
        | Inside_double_quotes ->
            CamomileLibrary.UTF8.Buf.add_char curr x
      end else if CamomileLibrary.UChar.eq simple_quote x then begin
        match !state with
        | Outside_quotes -> state := Inside_simple_quotes
        | Inside_simple_quotes -> state := Outside_quotes
        | Inside_double_quotes -> CamomileLibrary.UTF8.Buf.add_char curr x
      end else if CamomileLibrary.UChar.eq double_quote x then begin
        match !state with
        | Outside_quotes -> state := Inside_double_quotes
        | Inside_simple_quotes -> CamomileLibrary.UTF8.Buf.add_char curr x
        | Inside_double_quotes -> state := Outside_quotes
      end else
        CamomileLibrary.UTF8.Buf.add_char curr x)
    str;
  let last = CamomileLibrary.UTF8.Buf.contents curr in
  if CamomileLibrary.UTF8.length last > 0 then
    res := last :: !res;
  List.rev !res

let empty_string = CamomileLibrary.UTF8.init 0 (fun _ -> assert false)

let trim str =
  let len = CamomileLibrary.UTF8.length str in
  let i = ref 0 in
  while !i < len && UChar.is_whitespace (get str !i) do
    incr i
  done;
  let j = ref (len - 1) in
  while !j >= !i && UChar.is_whitespace (get str !j) do
    decr j
  done;
  if !i = 0 && !j = len - 1 then
    str
  else if !j >= !i then
    substring str !i (!j - !i + 1)
  else
    empty_string

let starts_with prefix str =
  if (length str) >= (length prefix) then begin
    let idx_prefix = ref (CamomileLibrary.UTF8.first prefix) in
    let idx_str = ref (CamomileLibrary.UTF8.first str) in
    while (not (CamomileLibrary.UTF8.out_of_range prefix !idx_prefix))
        && (CamomileLibrary.UChar.eq
              (CamomileLibrary.UTF8.get prefix !idx_prefix)
              (CamomileLibrary.UTF8.get str !idx_str)) do
      idx_prefix := CamomileLibrary.UTF8.next prefix !idx_prefix;
      idx_str := CamomileLibrary.UTF8.next str !idx_str;
    done;
    CamomileLibrary.UTF8.out_of_range prefix !idx_prefix
  end else
    false

let ends_with suffix str =
  if (length str) >= (length suffix) then begin
    let idx_suffix = ref (CamomileLibrary.UTF8.last suffix) in
    let idx_str = ref (CamomileLibrary.UTF8.last str) in
    while (not (CamomileLibrary.UTF8.out_of_range suffix !idx_suffix))
        && (CamomileLibrary.UChar.eq
              (CamomileLibrary.UTF8.get suffix !idx_suffix)
              (CamomileLibrary.UTF8.get str !idx_str)) do
      idx_suffix := CamomileLibrary.UTF8.prev suffix !idx_suffix;
      idx_str := CamomileLibrary.UTF8.prev str !idx_str;
    done;
    CamomileLibrary.UTF8.out_of_range suffix !idx_suffix
  end else
    false

external is_printable : char -> bool =
  "caml_is_printable"

let double_quote = UChar.of_char '"'

let simple_quote = UChar.of_char '\''

let back_slash = UChar.of_char '\\'

let lowercase_n = UChar.of_char 'n'

let lowercase_t = UChar.of_char 't'

let lowercase_u = UChar.of_char 'u'

let new_line = CamomileLibrary.UChar.of_char '\n'

let tabulation = CamomileLibrary.UChar.of_char '\t'

let escape_delim delim str =
  let len_twice = (CamomileLibrary.UTF8.length str) * 2 in
  let res = CamomileLibrary.UTF8.Buf.create len_twice in
  let add_char x =
    CamomileLibrary.UTF8.Buf.add_char res (camomile_of_uchar x) in
  let add_string x =
    CamomileLibrary.UTF8.Buf.add_string res (of_string x) in
  add_char delim;
  CamomileLibrary.UTF8.iter
    (fun c ->
      let code = CamomileLibrary.UChar.uint_code c in
      if code > 0x7F || code < 0 then
        (* from Camomile's UPervasives module *)
        let code'1 = code land 0x0000FFFF in
        let code'2 = code lsr 16 in
        if code'2 = 0 then
          add_string (Printf.sprintf "\\u%04X" code'1)
        else
          add_string (Printf.sprintf "\\U%04X%04X" code'2 code'1)
      else
        (* from stdlib's Char/String modules *)
        match UChar.to_char (uchar_of_camomile c) with
        | '"' ->
            add_char back_slash;
            add_char double_quote
        | '\\' ->
            add_char back_slash;
            add_char back_slash
        | '\n' ->
            add_char back_slash;
            add_char lowercase_n
        | '\t' ->
            add_char back_slash;
            add_char lowercase_t
        | ch ->
            if is_printable ch then
              add_char (UChar.of_char ch)
            else
              let cc = Char.code ch in
              add_char back_slash;
              add_char (UChar.of_char (Char.chr (48 + cc / 100)));
              add_char (UChar.of_char (Char.chr (48 + (cc / 10) mod 10)));
              add_char (UChar.of_char (Char.chr (48 + cc mod 10))))
    str;
  add_char delim;
  CamomileLibrary.UTF8.Buf.contents res

let escape str =
  escape_delim double_quote str

let escape_char ch =
  escape_delim simple_quote (of_uchar ch)

let unescape str =
  let len = CamomileLibrary.UTF8.length str in
  let double_quote' = camomile_of_uchar double_quote in
  let back_slash' = camomile_of_uchar back_slash in
  if (len < 2)
  || (not (CamomileLibrary.UChar.eq (CamomileLibrary.UTF8.get str 0) double_quote'))
  || (not (CamomileLibrary.UChar.eq (CamomileLibrary.UTF8.get str (pred len)) double_quote')) then
    fail (Invalid_escaped_string str)
  else
    let res = CamomileLibrary.UTF8.Buf.create len in
    let idx = ref (CamomileLibrary.UTF8.nth str 1) in
    let last = CamomileLibrary.UTF8.last str in
    let read_digit hex =
      if CamomileLibrary.UTF8.compare_index str !idx last >= 0 then
        fail (Invalid_escaped_string str)
      else
        let d = Char.uppercase (UChar.to_char (uchar_of_camomile (CamomileLibrary.UTF8.look str !idx))) in
        if (d >= '0' && d <= '9') then
          (Char.code d) - (Char.code '0')
        else if (hex && (d >= 'A' && d <= 'F')) then
          (Char.code d) - (Char.code 'A')
        else fail (Invalid_escaped_string str) in
    while CamomileLibrary.UTF8.compare_index str !idx last < 0 do
      let c = CamomileLibrary.UTF8.look str !idx in
      if UChar.equal back_slash (uchar_of_camomile c) then begin
        idx := CamomileLibrary.UTF8.next str !idx;
        if CamomileLibrary.UTF8.compare_index str !idx last >= 0 then
          fail (Invalid_escaped_string str)
        else
          let c' = uchar_of_camomile (CamomileLibrary.UTF8.look str !idx) in
          if UChar.equal double_quote c' then begin
            CamomileLibrary.UTF8.Buf.add_char res double_quote';
            idx := CamomileLibrary.UTF8.next str !idx
          end else if UChar.equal back_slash c' then begin
            CamomileLibrary.UTF8.Buf.add_char res back_slash';
            idx := CamomileLibrary.UTF8.next str !idx
          end else if UChar.equal lowercase_n c' then begin
            CamomileLibrary.UTF8.Buf.add_char res new_line;
            idx := CamomileLibrary.UTF8.next str !idx
          end else if UChar.equal lowercase_t c' then begin
            CamomileLibrary.UTF8.Buf.add_char res tabulation;
            idx := CamomileLibrary.UTF8.next str !idx
          end else if UChar.equal lowercase_u c' then
            let digit'1 = read_digit true in
            let digit'2 = read_digit true in
            let digit'3 = read_digit true in
            let digit'4 = read_digit true in
            let code =
              (digit'1 lsl 12)
                + (digit'2 lsl 8)
                + (digit'3 lsl 4)
                + digit'4 in
            CamomileLibrary.UTF8.Buf.add_char res (CamomileLibrary.UChar.chr_of_uint code)
          else if UChar.equal (UChar.of_char 'U') c' then
            let digit'1 = read_digit true in
            let digit'2 = read_digit true in
            let digit'3 = read_digit true in
            let digit'4 = read_digit true in
            let digit'5 = read_digit true in
            let digit'6 = read_digit true in
            let digit'7 = read_digit true in
            let digit'8 = read_digit true in
            let code =
              (digit'1 lsl 28)
                + (digit'2 lsl 24)
                + (digit'3 lsl 20)
                + (digit'4 lsl 16)
                + (digit'5 lsl 12)
                + (digit'6 lsl 8)
                + (digit'7 lsl 4)
                + digit'8 in
            CamomileLibrary.UTF8.Buf.add_char res (CamomileLibrary.UChar.chr_of_uint code)
          else (* digits *)
            let digit'1 = Char.code (UChar.to_char c') in
            idx := CamomileLibrary.UTF8.next str !idx;
            let digit'2 = read_digit false in
            let digit'3 = read_digit false in
            let code =
              ((digit'1 - 48) * 100)
                + ((digit'2 - 48) * 10)
                + (digit'3 - 48) in
            CamomileLibrary.UTF8.Buf.add_char res (camomile_of_uchar (UChar.of_char (Char.chr code)))
      end else begin
        CamomileLibrary.UTF8.Buf.add_char res c;
        idx := CamomileLibrary.UTF8.next str !idx
      end
    done;
    CamomileLibrary.UTF8.Buf.contents res

type t' = t

module HashedType = struct

  type t = t'

  let equal str1 str2 = equal str1 str2

  let hash str = hash str

end

module OrderedType = struct

  type t = t'

  let compare str1 str2 = compare str1 str2

end

module Hashtbl = Hashtbl.Make (HashedType)

module Map = Map.Make (OrderedType)

module Set = Set.Make (OrderedType)

ELSE (* USE_JDK *)

type t = java'lang'String java_instance

BARISTA_ERROR =
  | Unable_to_convert_to_bytes of (str : t) ->
      Printf.sprintf "unable to convert to bytes (%S)"
        (JavaString.to_string str)
  | Unable_to_convert_from_bytes of (bytes : Bytes.t) ->
      Printf.sprintf "unable to convert from bytes (%s)"
        (Bytes.to_string bytes)
  | Unable_to_convert_to_utf8 of (str : string) ->
      Printf.sprintf "unable to convert to UTF8 (%S)" str
  | Unable_to_convert_from_utf8 of (str : t) ->
      Printf.sprintf "unable to convert from UTF8 (%S)"
        (JavaString.to_string str)
  | Invalid_index of (idx : int) * (len : int) ->
      Printf.sprintf "invalid index (%d, %d)" idx len
  | Invalid_substring of (idx1 : int) * (idx2 : int) * (len : int) ->
      Printf.sprintf "invalid substring (%d, %d, %d)" idx1 idx2 len
  | Invalid_escaped_string of (str : t) ->
      Printf.sprintf "invalid escaped string (%S)"
        (JavaString.to_string str)

external bytes_of_byte_array : int JavaByteArray.t -> Bytes.t =
  "%identity"

external byte_array_of_bytes : Bytes.t -> int JavaByteArray.t =
  "%identity"

external code_point_of_uchar : UChar.t -> int32 =
  "%identity"

external uchar_of_code_point : int32 -> UChar.t =
  "%identity"

external of_string_prim : string -> _'String java_instance =
  "ocamljava_javastring_of_string"

external to_string_prim : _'String java_instance -> string =
  "ocamljava_javastring_to_string"

let make_of_list l =
  let res = Java.make "StringBuilder(int)" (Int32.of_int (List.length l)) in
  (List.fold_left
     (fun acc elem ->
       Java.call "StringBuilder.appendCodePoint(_):StringBuilder"
         acc
         (code_point_of_uchar elem))
     res
     l)
  |> Java.call "StringBuilder.toString()"

let to_string str =
  try
    to_string_prim str
  with Java_exception _ ->
    fail (Unable_to_convert_from_utf8 str)

let to_string_noerr str =
  try
    to_string_prim str
  with Java_exception _ ->
    "???"

let of_string str =
  try
    of_string_prim str
  with Java_exception _ ->
    fail (Unable_to_convert_to_utf8 str)

let remove_leading_length bytes =
  let len = JavaByteArray.length bytes in
  let res = Java.make_array "byte[]" (Int32.sub len 2l) in
  Java.call "System.arraycopy(_,_,_,_,_)"
    (JavaByteArray.to_object bytes)
    2l
    (JavaByteArray.to_object res)
    0l
    (Int32.sub len 2l);
  res

let to_bytes_short str =
  let len = Java.call "String.length()" str in
  let baos = Java.make "java.io.ByteArrayOutputStream(_)" (Int32.add len len) in
  let dos = Java.make "java.io.DataOutputStream(_)" baos in
  Java.call "java.io.DataOutputStream.writeUTF(_)" dos str;
  Java.call "java.io.ByteArrayOutputStream.toByteArray()" baos
  |> remove_leading_length
  |> bytes_of_byte_array

let to_bytes_long str =
  try
    let len = Java.call "String.length()" str in
    let res = ByteBuffer.make_of_size (Int32.to_int len) in
    let add_char ch = ByteBuffer.add_byte res ch in
    let i = ref 0l in
    while !i < len do
      let code = Int32.to_int (Java.call "String.codePointAt(_)" str !i) in
      if (code >= 0x0001) && (code <= 0x007F) then
        add_char code
      else if (code = 0x0000) || ((code >= 0x0080) && (code <= 0x07FF)) then begin
        add_char ((0x00C0 lor (code lsr 6)));
        add_char ((0x0080 lor (code land 0x003F)))
      end else if (code >= 0x0800) && (code <= 0xFFFF) then begin
        add_char ((0x00E0 lor (code lsr 12)));
        add_char ((0x0080 lor ((code lsr 6) land 0x003F)));
        add_char ((0x0080 lor (code land 0x003F)))
      end else begin
        add_char 0x00ED;
        add_char ((0x00A0 lor (((code lsr 16) - 1) land 0x000F)));
        add_char ((0x0080 lor ((code lsr 10) land 0x003F)));
        add_char 0x00ED;
        add_char ((0x00B0 lor ((code lsr 6) land 0x000F)));
        add_char ((0x0080 lor (code land 0x003F)))
      end;
      i := Int32.succ !i
    done;
    ByteBuffer.contents res
  with _ ->
    fail (Unable_to_convert_to_bytes str)

let to_bytes str =
  try
    to_bytes_short str
  with
  | Java_exception e when Java.instanceof "java.io.UTFDataFormatException" e ->
      to_bytes_long str
  | Java_exception _ ->
      fail (Unable_to_convert_to_bytes str)

let prepend_with_size bytes =
  let len = JavaByteArray.length bytes in
  let res = Java.make_array "byte[]" (Int32.add len 2l) in
  JavaByteArray.set
    res
    0l
    Int32.(to_int (logand
                     (shift_right_logical len 8)
                     255l));
  JavaByteArray.set
    res
    1l
    Int32.(to_int (logand len 255l));
  Java.call "System.arraycopy(_,_,_,_,_)"
    (JavaByteArray.to_object bytes)
    0l
    (JavaByteArray.to_object res)
    2l
    len;
  res

let of_bytes_short bytes =
  bytes
  |> prepend_with_size
  |> Java.make "java.io.ByteArrayInputStream(_)"
  |> Java.make "java.io.DataInputStream(_)"
  |> Java.call "java.io.DataInputStream.readUTF()"

let of_bytes_long bytes =
  let len = JavaByteArray.length bytes in
  let res = Java.make "StringBuilder(int)" len in
  let add_char x =
    x
    |> Int32.of_int
    |> Java.call "StringBuilder.appendCodePoint(_):StringBuilder" res
    |> ignore in
  let i = ref 0l in
  while !i < len do
    let code = JavaByteArray.get bytes !i in
    i := Int32.add !i 1l;
    if (code land 0x0080) = 0 then
      add_char code
    else if (code land 0x00E0) = 0x00C0 then begin
      let code'1 = JavaByteArray.get bytes !i in
      i := Int32.add !i 1l;
      assert ((code'1 land 0x00C0) = 0x0080);
      let v = ((code land 0x001F) lsl 6) lor (code'1 land 0x003F) in
      add_char v
    end else if (code land 0x00F0) = 0x00E0 then begin
      let code'1 = JavaByteArray.get bytes !i in
      let code'2 = JavaByteArray.get bytes (Int32.succ !i) in
      i := Int32.add !i 2l;
      assert ((code'1 land 0x00C0) = 0x0080);
      assert ((code'2 land 0x00C0) = 0x0080);
      let v = ((code land 0x000F) lsl 12)
          lor ((code'1 land 0x003F) lsl 6)
          lor (code'2 land 0x003F) in
      add_char v
    end else begin
      let code'1 = JavaByteArray.get bytes !i in
      let code'2 = JavaByteArray.get bytes (Int32.add !i 1l) in
      let code'3 = JavaByteArray.get bytes (Int32.add !i 2l) in
      let code'4 = JavaByteArray.get bytes (Int32.add !i 3l) in
      let code'5 = JavaByteArray.get bytes (Int32.add !i 4l) in
      i := Int32.add !i 5l;
      assert (code = 0x00ED);
      assert ((code'1 land 0x00F0) = 0x00A0);
      assert ((code'2 land 0x00C0) = 0x0080);
      assert (code'3 = 0x00ED);
      assert ((code'4 land 0x00F0) = 0x00B0);
      assert ((code'5 land 0x00C0) = 0x0080);
      let v = 0x010000
          lor ((code'1 land 0x000F) lsl 16)
          lor ((code'2 land 0x003F) lsl 10)
          lor ((code'4 land 0x000F) lsl 6)
          lor (code'5 land 0x003F) in
      add_char v
    end
  done;
  Java.call "StringBuilder.toString()" res

let of_bytes bytes =
  let b = byte_array_of_bytes bytes in
  if (JavaByteArray.length b) <= 65535l then
    try
      of_bytes_short b
    with Java_exception _ ->
      fail (Unable_to_convert_from_bytes bytes)
  else
    try
      of_bytes_long b
    with _ ->
      fail (Unable_to_convert_from_bytes bytes)

let to_latin1 str =
  Java.call "String.getBytes(java.nio.charset.Charset)"
    str
    (Java.get "java.nio.charset.StandardCharsets.ISO_8859_1" ())
    |> bytes_of_byte_array

let of_latin1 bytes =
  Java.make "String(_,java.nio.charset.Charset)"
    (byte_array_of_bytes bytes)
    (Java.get "java.nio.charset.StandardCharsets.ISO_8859_1" ())

let of_char ch =
  ch
  |> Char.code
  |> Java.call "Character.toString(_)"

let of_uchar ch =
  let res = Java.make "StringBuilder(int)" 1l in
  ch
  |> code_point_of_uchar
  |> Java.call "StringBuilder.appendCodePoint(_):StringBuilder" res
  |> Java.call "StringBuilder.toString()"

let length str =
  str
  |> Java.call "String.length()"
  |> Int32.to_int

let get str idx =
  try
    Java.call "String.codePointAt(_)" str (Int32.of_int idx)
    |> uchar_of_code_point
  with Java_exception _ ->
    fail (Invalid_index (idx, length str))

let equal str1 str2 =
  (str1 == str2) || (Java.call "String.equals(_)" str1 str2)

let compare str1 str2 =
  if str1 == str2 then
    0
  else
    Java.call "String.compareTo(String)" str1 str2
    |> Int32.to_int

let hash str =
  let len = Java.call "String.length()" str in
  let res = ref 0 in
  let i = ref 0l in
  while (!i < len) do
    res := 17 * !res
        + (Int32.to_int (Java.call "String.codePointAt(_)" str !i));
    i := Int32.succ !i
  done;
  !res

let index_from str idx ch =
  let idx = Int32.of_int idx in
  let ch = code_point_of_uchar ch in
  let res = Java.call "String.indexOf(int,int)" str ch idx in
  if res < 0l then
    raise Not_found
  else
    Int32.to_int res

let rindex_from str idx ch =
  let idx = Int32.of_int idx in
  let ch = code_point_of_uchar ch in
  let res = Java.call "String.lastIndexOf(int,int)" str ch idx in
  if res < 0l then
    raise Not_found
  else
    Int32.to_int res

let substring str first last =
  if last < first then
    Java.make "String()" ()
  else begin
    let len = length str in
    if (first < 0) || (first >= len) || (last < 0) || (last > len) then
      fail (Invalid_substring (first, last, len));
    Java.call "String.substring(_,_)"
      str
      (Int32.of_int first)
      (Int32.of_int (succ last))
  end

let (++) str1 str2 =
  Java.call "String.concat(_)" str1 str2

let concat l =
  let len =
    List.fold_left
      (fun acc elem -> acc + (length elem))
      0
      l in
  let res = Java.make "StringBuilder(int)" (Int32.of_int len) in
  (List.fold_left
     (fun acc elem ->
       Java.call "StringBuilder.append(String):StringBuilder" acc elem)
     res
     l)
  |> Java.call "StringBuilder.toString()"

let concat_sep sep l =
  let sep_len = length sep in
  let len =
    List.fold_left
      (fun acc elem -> acc + sep_len + (length elem))
      0
      l in
  let len = Utils.max_int 0 (len - sep_len) in
  let res = Java.make "StringBuilder(int)" (Int32.of_int len) in
  (match l with
  | hd :: tl ->
      Java.call "StringBuilder.append(String):StringBuilder" res hd
      |> ignore;
      List.iter
        (fun str ->
          Java.call "StringBuilder.append(String):StringBuilder" res sep
          |> ignore;
          Java.call "StringBuilder.append(String):StringBuilder" res str
          |> ignore)
        tl
  | [] -> ());
  Java.call "StringBuilder.toString()" res

let concat_sep_map sep f l =
  List.map f l
  |> concat_sep sep

let concat_sep_map_last sep f1 f2 l =
  (match List.rev l with
  | hd :: tl -> (f2 hd) :: (List.map f1 tl)
  | [] -> [])
  |> List.rev
  |> concat_sep sep

let replace ch1 ch2 str =
  let ch1 = code_point_of_uchar ch1 in
  let ch2 = code_point_of_uchar ch2 in
  let len = Java.call "String.length()" str in
  let res = Java.make "StringBuilder(int)" len in
  let idx = ref 0l in
  while !idx < len do
    let ch = Java.call "String.codePointAt(_)" str !idx in
    if ch = ch1 then
      Java.call "StringBuilder.appendCodePoint(_):StringBuilder" res ch2
      |> ignore
    else
      Java.call "StringBuilder.appendCodePoint(_):StringBuilder" res ch
      |> ignore;
    idx := Int32.succ !idx
  done;
  Java.call "StringBuilder.toString()" res

let contains ch str =
  let ch = code_point_of_uchar ch in
  (Java.call "String.indexOf(int)" str ch) >= 0l

let split ch str =
  let res = ref [] in
  let len = Java.call "String.length()" str in
  let curr = Java.make "StringBuilder(int)" len in
  let ch = code_point_of_uchar ch in
  let idx = ref 0l in
  while (!idx < len) do
    let x = Java.call "String.codePointAt(_)" str !idx in
    if ch = x then begin
      res := (Java.call "StringBuilder.toString()" curr) :: !res;
      Java.call "StringBuilder.setLength(_)" curr 0l;
    end else begin
      Java.call "StringBuilder.appendCodePoint(_):StringBuilder" curr x
      |> ignore
    end;
    idx := Int32.succ !idx
  done;
  let last = Java.call "StringBuilder.toString()" curr in
  if (length last) > 0 then
    res := last :: !res;
  List.rev !res

type split_state =
  | Outside_quotes
  | Inside_simple_quotes
  | Inside_double_quotes

external code_point_of_char : char -> int32 =
  "%int32_of_int"

let double_quote = code_point_of_char '"'

let simple_quote = code_point_of_char '\''

let split_quotes ch str =
  let state = ref Outside_quotes in
  let res = ref [] in
  let len = Java.call "String.length()" str in
  let curr = Java.make "StringBuilder(int)" len in
  let ch = code_point_of_uchar ch in
  let idx = ref 0l in
  while (!idx < len) do
    let x = Java.call "String.codePointAt(_)" str !idx in
    if ch = x then begin
      match !state with
      | Outside_quotes ->
          res := (Java.call "StringBuilder.toString()" curr) :: !res;
          Java.call "StringBuilder.setLength(_)" curr 0l
      | Inside_simple_quotes
      | Inside_double_quotes ->
          Java.call "StringBuilder.appendCodePoint(_):StringBuilder" curr x
          |> ignore
    end else if simple_quote = x then begin
      match !state with
      | Outside_quotes -> state := Inside_simple_quotes
      | Inside_simple_quotes -> state := Outside_quotes
      | Inside_double_quotes ->
          Java.call "StringBuilder.appendCodePoint(_):StringBuilder" curr x
          |> ignore
    end else if double_quote = x then begin
      match !state with
      | Outside_quotes -> state := Inside_double_quotes
      | Inside_simple_quotes ->
          Java.call "StringBuilder.appendCodePoint(_):StringBuilder" curr x
          |> ignore
      | Inside_double_quotes -> state := Outside_quotes
    end else begin
      Java.call "StringBuilder.appendCodePoint(_):StringBuilder" curr x
      |> ignore
    end;
    idx := Int32.succ !idx
  done;
  let last = Java.call "StringBuilder.toString()" curr in
  if (length last) > 0 then
    res := last :: !res;
  List.rev !res

let trim str =
  Java.call "String.trim()" str

let starts_with prefix str =
  Java.call "String.startsWith(_)" str prefix

let ends_with suffix str =
  Java.call "String.endsWith(_)" str suffix

external is_printable : char -> bool =
  "caml_is_printable"

external code_point_of_int : int -> int32 =
  "%int32_of_int"

let back_slash = code_point_of_char '\\'

let lowercase_n = code_point_of_char 'n'

let lowercase_t = code_point_of_char 't'

let lowercase_u = code_point_of_char 'u'

let new_line = code_point_of_char '\n'

let tabulation = code_point_of_char '\t'

let escape_delim delim str =
  let len = Java.call "String.length()" str in
  let res = Java.make "StringBuilder(int)" (Int32.add len len) in
  let add_char x =
    Java.call "StringBuilder.appendCodePoint(_):StringBuilder" res x
    |> ignore in
  let add_string x =
    let x = JavaString.of_string x in
    Java.call "StringBuilder.append(String):StringBuilder" res x
    |> ignore in
  add_char delim;
  let idx = ref 0l in
  while (!idx < len) do
    let code = Int32.to_int (Java.call "String.codePointAt(_)" str !idx) in
      if code > 0x7F || code < 0 then
        (* from Camomile's UPervasives module *)
        let code'1 = code land 0x0000FFFF in
        let code'2 = code lsr 16 in
        if code'2 = 0 then
          add_string (Printf.sprintf "\\u%04X" code'1)
        else
          add_string (Printf.sprintf "\\U%04X%04X" code'2 code'1)
      else begin
        (* from stdlib's Char/String modules *)
        match Char.unsafe_chr code with
        | '"' ->
            add_char back_slash;
            add_char double_quote
        | '\\' ->
            add_char back_slash;
            add_char back_slash
        | '\n' ->
            add_char back_slash;
            add_char lowercase_n
        | '\t' ->
            add_char back_slash;
            add_char lowercase_t
        | ch ->
            if is_printable ch then
              add_char (code_point_of_char ch)
            else
              let cc = Char.code ch in
              add_char back_slash;
              add_char (code_point_of_char (Char.chr (48 + cc / 100)));
              add_char (code_point_of_char (Char.chr (48 + (cc / 10) mod 10)));
              add_char (code_point_of_char (Char.chr (48 + cc mod 10)))
      end;
    idx := Int32.succ !idx
  done;
  add_char delim;
  Java.call "StringBuilder.toString()" res

let escape str =
  escape_delim double_quote str

let escape_char ch =
  escape_delim simple_quote (of_uchar ch)

let unescape str =
  let len = Java.call "String.length()" str in
  if (len < 2l)
  || (not ((Java.call "String.codePointAt(_)" str 0l) = double_quote))
  || (not ((Java.call "String.codePointAt(_)" str (Int32.pred len)) = double_quote)) then
    fail (Invalid_escaped_string str)
  else
    let res = Java.make "StringBuilder(int)" len in
    let idx = ref 1l in
    let last = Int32.pred len in
    let read_digit hex =
      if !idx >= last then
        fail (Invalid_escaped_string str)
      else
        let d = Java.call "String.codePointAt(_)" str !idx in
        let d = Java.call "Character.toUpperCase(int)" d in
        let d = Char.unsafe_chr (Int32.to_int d) in
        if (d >= '0' && d <= '9') then
          (Char.code d) - (Char.code '0')
        else if (hex && (d >= 'A' && d <= 'F')) then
          (Char.code d) - (Char.code 'A')
        else fail (Invalid_escaped_string str) in
    while !idx < last do
      let c = Java.call "String.codePointAt(_)" str !idx in
      if back_slash = c then begin
        idx := Int32.succ !idx;
        if !idx >= last then
          fail (Invalid_escaped_string str)
        else
          let c' = Java.call "String.codePointAt(_)" str !idx in
          if double_quote = c' then begin
            Java.call "StringBuilder.appendCodePoint(_):StringBuilder" res double_quote
            |> ignore;
            idx := Int32.succ !idx
          end else if back_slash = c' then begin
            Java.call "StringBuilder.appendCodePoint(_):StringBuilder" res back_slash
            |> ignore;
            idx := Int32.succ !idx
          end else if lowercase_n = c' then begin
            Java.call "StringBuilder.appendCodePoint(_):StringBuilder" res new_line
            |> ignore;
            idx := Int32.succ !idx
          end else if lowercase_t = c' then begin
            Java.call "StringBuilder.appendCodePoint(_):StringBuilder" res tabulation
            |> ignore;
            idx := Int32.succ !idx
          end else if lowercase_u = c' then
            let digit'1 = read_digit true in
            let digit'2 = read_digit true in
            let digit'3 = read_digit true in
            let digit'4 = read_digit true in
            let code =
              (digit'1 lsl 12)
                + (digit'2 lsl 8)
                + (digit'3 lsl 4)
                + digit'4 in
            Java.call "StringBuilder.appendCodePoint(_):StringBuilder"
              res
              (code_point_of_int code)
            |> ignore
          else if (code_point_of_char 'U') = c' then
            let digit'1 = read_digit true in
            let digit'2 = read_digit true in
            let digit'3 = read_digit true in
            let digit'4 = read_digit true in
            let digit'5 = read_digit true in
            let digit'6 = read_digit true in
            let digit'7 = read_digit true in
            let digit'8 = read_digit true in
            let code =
              (digit'1 lsl 28)
                + (digit'2 lsl 24)
                + (digit'3 lsl 20)
                + (digit'4 lsl 16)
                + (digit'5 lsl 12)
                + (digit'6 lsl 8)
                + (digit'7 lsl 4)
                + digit'8 in
            Java.call "StringBuilder.appendCodePoint(_):StringBuilder"
              res
              (code_point_of_int code)
            |> ignore
          else (* digits *)
            let digit'1 = Int32.to_int c' in
            idx := Int32.succ !idx;
            let digit'2 = read_digit false in
            let digit'3 = read_digit false in
            let code =
              ((digit'1 - 48) * 100)
                + ((digit'2 - 48) * 10)
                + (digit'3 - 48) in
            Java.call "StringBuilder.appendCodePoint(_):StringBuilder"
              res
              (code_point_of_int code)
            |> ignore
      end else begin
        Java.call "StringBuilder.appendCodePoint(_):StringBuilder" res c
        |> ignore;
        idx := Int32.succ !idx
      end
    done;
    Java.call "StringBuilder.toString()" res

type t' = t

module HashedType = struct

  type t = t'

  let equal str1 str2 = equal str1 str2

  let hash str = hash str

end

module OrderedType = struct

  type t = t'

  let compare str1 str2 = compare str1 str2

end

module Hashtbl = Hashtbl.Make (HashedType)

module Map = Map.Make (OrderedType)

module Set = Set.Make (OrderedType)

END
