#!/usr/bin/env python
# -*- coding: utf-8 -*-
# $Id: Encoding.py 11267 2018-11-23 08:23:13Z Kevin $
#
# Copyright (c) 2016 Nuwa Information Co., Ltd, and individual contributors.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   1. Redistributions of source code must retain the above copyright notice,
#      this list of conditions and the following disclaimer.
#
#   2. Redistributions in binary form must reproduce the above copyright
#      notice, this list of conditions and the following disclaimer in the
#      documentation and/or other materials provided with the distribution.
#
#   3. Neither the name of Nuwa Information nor the names of its contributors
#      may be used to endorse or promote products derived from this software
#      without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# $Author: Kevin $ (last)
# $Date: 2018-11-23 17:23:13 +0900 (週五, 23 十一月 2018) $
# $Revision: 11267 $
"""
Encoding related utilities.
"""
import six
import chardet

def utf8(s, encodings=None, throw=True):
    """
    Convert a string (UNICODE or ANSI) to a utf8 string.

    @param s String.
    @param encodings Native encodings for decode. It will be tried to decode
                     string, try and error.
    @param throw Raise exception if it fails to convert string.
    @return UTF8 string.
    """
    if isinstance(s, six.text_type):
        return s.encode('utf-8')
    else:
        return _unicode(s, encodings=encodings, throw=throw).encode('utf-8')

import locale

_UNICODE_TRY_ENCODINGS = (locale.getlocale()[1],)
# See #2758. for details.
if 'cp950' not in _UNICODE_TRY_ENCODINGS:
    _UNICODE_TRY_ENCODINGS = _UNICODE_TRY_ENCODINGS + ('cp950',)

def _unicode(s, strict=False, encodings=None, throw=True, confidence=0.8):
    """
    Force to UNICODE string (str type in Python 3).

    @param s String.
    @param strict Useless, just for backward compatible.
    @param encodings Native encodings for decode. It will be tried to decode
                     string, try and error.
    @param throw Raise exception if it fails to convert string.
    @param confidence
    @return UNICODE type string.
    """
    # This is unicode() in Python 2 and str in Python 3
    if isinstance(s, six.text_type):
        return s
    else:
        if not encodings:
            encodings = []

        if not isinstance(s, six.binary_type):
            if six.PY2:
                s = six.binary_type(s)
            else:
                return str(s)

        try:
            result = chardet.detect(s)

            if result['confidence'] > confidence:
                if result['encoding']:
                    encodings.append(result['encoding'])
                encodings.extend(_UNICODE_TRY_ENCODINGS)
            else:
                encodings.extend(_UNICODE_TRY_ENCODINGS)
                if result['encoding']:
                    encodings.append(result['encoding'])
            
        except Exception as e:
            encodings.extend(_UNICODE_TRY_ENCODINGS)

        error = None
        for e in encodings:
            try:
                return s.decode(e)
            except Exception as e:
                error = e

        if throw and error:
            raise error
