#!/usr/bin/env python
# -*- coding: utf-8 -*-
# $Id: build_boilerplate.py 10303 2017-09-17 10:33:08Z Lavender $
#
# Copyright (c) 2017 Nuwa Information Co., Ltd, All Rights Reserved.
#
# Licensed under the Proprietary License,
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at our web site.
#
# See the License for the specific language governing permissions and
# limitations under the License.
#
# $Author: Lavender $
# $Date: 2017-09-17 18:33:08 +0800 (週日, 17 九月 2017) $
# $Revision: 10303 $

import os
import re
import hashlib
import json

from lxml import etree
from lxml.html import soupparser
from lxml.html.soupparser import parse
from django.core.management.base import BaseCommand
from django.conf import settings
from django.template.loader import get_template
from BeautifulSoup import (
        BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
        Declaration, CData)
_DECLARATION_OR_DOCTYPE = Declaration

# 因為 lxml 關於使用哪個 BeautifulSoup 來 parse 的機制沒有設計得很完善會有 bug
# 因此使用 monkey patch 來做
# 注意: 別 Import 此檔案
soupparser.BeautifulSoup = BeautifulSoup
soupparser.Tag = Tag
soupparser.Comment = Comment
soupparser.ProcessingInstruction = ProcessingInstruction
soupparser.NavigableString = NavigableString
soupparser.Declaration = Declaration
soupparser.CData = CData
soupparser._DECLARATION_OR_DOCTYPE = _DECLARATION_OR_DOCTYPE

from Zephyrus.boilerplate.management.commands import (
    TEMPLATES_DIR_PATH, STATIC_DIR_PATH, ROOT_DIR_PATH)
from Zephyrus.boilerplate.management.commands import createLogger

logger = createLogger(__name__, "BuildBoilerplate2.log")

# inside -----------------------------------------------------------------------
# 段落 tag
TEXT_TAGS = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'details', 'address',]

# 視覺 tag
VISUAL_TAGS = [
    'em', 'strong', 'b', 'small', 'var', 'kbd', 'samp', 
    'pre', 'dfn', 'code', 'cite', 'del', 'ins', 'sub', 'sup', 'i',
]

ADD_PLACEHOLDER_TO_INSIDE_TAGS = ['span', 'button',] 

ALL_INSIDE_TAGS = TEXT_TAGS + VISUAL_TAGS + ADD_PLACEHOLDER_TO_INSIDE_TAGS

# outside ----------------------------------------------------------------------
ADD_PLACEHOLDER_TO_OUTSIDE_TAGS = [
    'img', 'ul', 'a', 'table', 'ol', 'form', 'dl', 
    'blockquote', 'q', 'abbr', 'article', 
] 

ALL_OUTSIDE_TAGS = ADD_PLACEHOLDER_TO_OUTSIDE_TAGS

# block ------------------------------------------------------------------------
BLOCK_TAGS = ['div',]

# special ----------------------------------------------------------------------
SKIP_BLOCK_TAG = ['a', 'span',] + VISUAL_TAGS
ADD_COMMENT_TO_PREVIOUS = ['img', 'a',]
JUMP_OFF_TAG = ['script', 'style', 'link',]
CHANGE_WORD = {
    '{': '&#123;',
    '}': '&#125;',
    '&#123;': '&#123;',
    '&#125;': '&#125;',
    '&amp;': '&',
}
EXCEPT_TAGS = ['nav', 'section', ]
NO_INDENT = ['pre', 'code',]

# comment ----------------------------------------------------------------------
COMMENT = "The template was modified by Nuwainfo.com"
LEGEL = "\n%s\n" % get_template('boilerplate/txt/Legel.txt').render()

# cms --------------------------------------------------------------------------
CMS_HEAD_FRONT_TAG = "{% load cms_tags menu_tags sekizai_tags staticfiles %}"
CMS_HEAD_BACK_TAG = "{% render_block 'css' %}"

CMS_BODY_FRONT_TAG = "{% cms_toolbar %}"
CMS_BODY_BACK_TAG = "{% render_block 'js' %}"

CMS_PAGE_ATTRS_COMMENT = '''
    CMS page attribute
    Slug: {% page_attribute "slug" %}
    Changed_date: {% page_attribute "changed_date" %}
    Changed_by: {% page_attribute "changed_by" %}
    '''

ALL_TAGS = ALL_INSIDE_TAGS + ALL_OUTSIDE_TAGS + BLOCK_TAGS

class Command(BaseCommand):

    help = 'Add placeholder to templates'
    args = '<destination folder>'

    def add_arguments(self, parser):
        parser.add_argument(
            '-d', '--debug', 
            action='store_false',
            help="Debug mode. Can see boilerplateMark in html.")

    def handle(self, *args, **options):
        if len(args) < 1:
            path = TEMPLATES_DIR_PATH
        else:
            path = args[0]

        debug = options['debug']

        logger.info('Processing...')

        # static and html file md5 dict ----------------------------------------
        staticDict = self.buildFileDict('static')
        staticDict['js/FixTemplate.js'] = \
            hashlib.md5('js/FixTemplate.js').hexdigest()
        templateDict = self.buildFileDict('templates')
        fileDict = dict(staticDict)
        fileDict.update(templateDict)
       
        for root, dirs, files in os.walk(path):
            for htmlfile in files:
                name = htmlfile
                htmlfile = os.path.join(root, htmlfile)
                logger.info(htmlfile)

                # preprocess ===================================================
                commentDict = self.findComment(htmlfile)
                self.replaceExceptTags(htmlfile)
                
                # process lxml etree ===========================================
                with open(htmlfile) as f:
                    if not f.name.endswith(".html"):
                        continue

                    # etree ----------------------------------------------------
                    page = parse(f, beautifulsoup=BeautifulSoup)
                    
                    # replace word
                    self.changeWordToMd5(page)

                    # build placeholder
                    self.buildPlaceholder(page, name, debug=debug)
                    
                    # deal with <head> and cms template tag
                    self.addCMSTags(page)

                    # deal with file path
                    self.replaceKeyword(page, fileDict)

                    for tag in page.iter(tag='html'):
                        html = tag
                    self.indent(html)

                    # write file
                    page.write(htmlfile, method='html', pretty_print=True)

                # endprocess ===================================================
                self.fixStaticTemplate(htmlfile, fileDict)
                self.fixComment(htmlfile, commentDict)
                self.changeMd5ToWord(htmlfile)
                self.fixExceptTags(htmlfile)

                logger.info(htmlfile + " is OK!")

        logger.info('ALL OK!')

    #---------------------------------Main--------------------------------------
    def buildPlaceholder(self, eTree, filename, debug=False):
        index = filename.index('.html')
        filename = filename[0:index]

        for ele in eTree.iter():
            if 'Comment' in str(ele.tag):
                continue

            ele.set('boilerplateMark', 'F')

        for tag in eTree.iter(tag='body'):
            body = tag

        # markTags
        for ele in body.iterdescendants():
            if 'Comment' in str(ele.tag):
                continue

            if ele.tag in BLOCK_TAGS:
                self.markBlockTag(ele)
            else:
                if not self.parentHasMarked(ele) and ele.tag in ALL_TAGS:
                    ele.set('boilerplateMark', 'T')

        # buildPlaceholder
        tagDict = {}
        for ele in body.iterdescendants():
            if 'Comment' in str(ele.tag):
                continue

            i = tagDict.get(ele.tag, 1)
            placeholderId = "%s-%s-%d" % (filename, ele.tag, i)
            tagDict[ele.tag] = i + 1
            
            if ele.tag in BLOCK_TAGS:
                self.buildBlockTagPlaceholder(ele, placeholderId)
            elif ele.tag in ALL_INSIDE_TAGS:
                self.buildInsidePlaceholder(ele, placeholderId)
            elif ele.tag in ALL_OUTSIDE_TAGS:
                self.buildOutsidePlaceholder(ele, placeholderId)
            else:
                pass

        for ele in eTree.iter():
            if 'Comment' in str(ele.tag):
                continue
            if debug:
                del ele.attrib['boilerplateMark']

    def buildFileDict(self, path):
        fileDict = {}
        
        if path == 'static':
            for root, dirs, files in os.walk(STATIC_DIR_PATH):
                for f in files:
                    index = len(STATIC_DIR_PATH) + 1
                    filePath = os.path.join(root, f)[index:].replace('\\', '/')
                    fileDict[filePath] = hashlib.md5(filePath).hexdigest()
        elif path == 'templates':
            for root, dirs, files in os.walk(TEMPLATES_DIR_PATH):
                for f in files:
                    index = len(TEMPLATES_DIR_PATH) + 1
                    filePath = os.path.join(root, f)[index:].replace('\\', '/')
                    fileDict[filePath] = hashlib.md5(filePath).hexdigest()
        else:
            for root, dirs, files in os.walk(path):
                for f in files:
                    index = len(path) + 1
                    filePath = os.path.join(root, f)[index:].replace('\\', '/')
        return fileDict



    #----------------------------lxml etree ele---------------------------------
    def indent(self, elem, level=0, indentNum=2):
        if elem.tag in NO_INDENT:
            return
        i = "\n" + level * (indentNum * " ")

        if len(elem):
            if not elem.text or not elem.text.strip():
                elem.text = i + (indentNum * " ")
            else:
                elem.text = i + (indentNum * " ") + \
                    elem.text.strip() + i + (indentNum * " ")

            for e in elem:
                self.indent(e, level + 1)
                if not e.tail or not e.tail.strip():
                    e.tail = i + (indentNum * " ")
                else:
                    e.tail = i + (indentNum * " ") + \
                        e.tail.strip() + i + (indentNum * " ")

            if not e.tail or not e.tail.strip():
                e.tail = i
            else:
                e.tail = i + (indentNum * " ") + e.tail.strip() + i
        else:
            if level and (not elem.tail or not elem.tail.strip()):
                elem.tail = i
            else:
                elem.tail = i + (indentNum * " ") + elem.tail.strip() + i


    def parentHasMarked(self, ele):
        for ancestor in ele.iterancestors():
            boilerplateMark = ancestor.get('boilerplateMark')
            if boilerplateMark == 'T':
                return True
        return False

    def dealWithJumpOffTags(self, ele):
        jumpOf = []
        for child in ele.iterdescendants():
            if child.tag in JUMP_OFF_TAG:
                jumpOf.append(child)
                child.getparent().remove(child)
        for tag in reversed(jumpOf):
            ele.addnext(tag)

    def buildInsidePlaceholder(self, ele, placeholderId):
        if not ele.get('boilerplateMark') == 'T':
            return

        self.dealWithJumpOffTags(ele)

        if not ele.text:
            ele.text = ''

        if ele.getchildren():
            if ele[-1].tail is None:
                ele[-1].tail = ''
            ele[-1].tail = ele[-1].tail + "{% endplaceholder %}"
            ele.text = "{% placeholder " + "'%s' " % placeholderId + "or %}" + \
                        ele.text
        else:
            ele.text = "{% placeholder " + "'%s' " % placeholderId + "or %}" + \
                        ele.text + "{% endplaceholder %}"

    def buildOutsidePlaceholder(self, ele, placeholderId):
        if not ele.get('boilerplateMark') == 'T':
            return

        self.dealWithJumpOffTags(ele)

        if ele.tail:
            ele.tail = "{% endplaceholder %}" + ele.tail
        else:
            ele.tail = "{% endplaceholder %}"

        if ele.tag in ADD_COMMENT_TO_PREVIOUS:
            text = "CMS:%s" % placeholderId
            comment = etree.Comment(text=text)
            ele.addprevious(comment)

        previous = ele.getprevious()

        if previous == None:
            parent = ele.getparent()
            if parent.text == None:
                parent.text = ''
            parent.text = \
                parent.text + "{% placeholder " + "'%s' " % placeholderId + \
                "or %}"
        else:
            if previous.tail == None:
                previous.tail = ''
            previous.tail = previous.tail + \
                            "{% placeholder " + "'%s' " % placeholderId + \
                            "or %}"
        

    def markBlockTag(self, ele):
        hasText = False
        for child in ele.getchildren():
            if not child.tail == None:
                hasText = True

            if 'Comment' in str(child.tag):
                continue

            if child.tag in SKIP_BLOCK_TAG:
                continue
            else:
                return
        if ele.text or hasText:
            if not ele.text:
                ele.text = ''
            if ele.text.strip() != '':
                ele.set('boilerplateMark', 'T')

    def buildBlockTagPlaceholder(self, ele, placeholderId):
        boilerplateMark = ele.get('boilerplateMark')

        if boilerplateMark == 'T':
            self.buildInsidePlaceholder(ele, placeholderId)
        else:
            index = 1
            if not ele.text == None:
                if not ele.text.strip() == '':
                    ele.text = \
                        "{% placeholder " + \
                        "'%s-text%d' " % (placeholderId, index) + "or %}" + \
                        ele.text + "{% endplaceholder %}"

            for child in ele.getchildren():
                if not child.tail == None:
                    if not child.tail.strip() == '':
                        index += 1
                        child.tail = \
                            "{% placeholder " + \
                            "'%s-text%d' " % (placeholderId, index) + \
                            "or %}" + child.tail + "{% endplaceholder %}"

    def addCMSTags(self, page):
        for tag in page.iter(tag='head'):
            head = tag
        for tag in page.iter(tag='body'):
            body = tag
        
        # create page data
        titleContent = None
        createTitle = True
        for tag in page.iter(tag='title'):
            titleContent = tag.text
            tag.text = "{% page_attribute 'page_title' %}"
            createTitle = False

        description = None
        createMetaDescription = True
        for tag in page.iter(tag='meta'):
            for name, value in tag.items():
                if name == 'name' and value == 'description':
                    description = tag.get('content')
                    tag.set(
                        "content", "{% page_attribute 'meta_description' %}")
                    createMetaDescription = False
                    
        data = {
            "title": titleContent,
            "description": description,
        }
        cmsPageData = ("CMS_PAGE_DATA%sEND_CMS_PAGE_DATA" % 
                       json.dumps(data))
        dataMd5 = hashlib.md5(cmsPageData).hexdigest()

        # create ele
        pgAttr = etree.Comment(text=CMS_PAGE_ATTRS_COMMENT)
        pgAttr.tail = '\n'

        cmsData = etree.Comment(text=cmsPageData)
        cmsData.tail = '\n'

        comment = etree.Element('meta')
        comment.set("name", "comment")
        comment.set("content", COMMENT)
        comment.tail = '\n'

        jq = etree.Element('script')
        jq.set("src", 
            "https://ajax.googleapis.com/ajax/libs/jquery/3.1.1/jquery.min.js")
        jq.tail = '\n'

        addJ = etree.Element('script')
        addJ.set('src', 'js/FixTemplate.js')
        addJ.tail = '\n'

        metaDescription = etree.Element('meta')
        metaDescription.set("name", "description")
        metaDescription.set(
            "content", "{% page_attribute 'meta_description' %}")
        metaDescription.tail = '\n'

        title = etree.Element('title')
        title.text = "{% page_attribute 'page_title' %}"
        title.tail = '\n'

        legel = etree.Comment(text=LEGEL)
        legel.tail = '\n'

        # add cms data to head
        if createTitle:
            head.insert(0, title)
        if createMetaDescription:
            head.insert(0, metaDescription)
        head.insert(0, comment) 
        head.insert(0, addJ)
        head.insert(0, jq)
        head.insert(0, cmsData)  
        head.insert(0, pgAttr)
          
        head[-1].tail = '\n' + CMS_HEAD_BACK_TAG + '\n'

        if head.text == None:
            head.text = ''
        head.text = '\n' + CMS_HEAD_FRONT_TAG + head.text

        # add cms data to body
        if body.getchildren():
            if body[-1].tail is None:
                body[-1].tail = ''
            body[-1].tail = body[-1].tail + CMS_BODY_BACK_TAG + '\n'
            body.text = '\n' + CMS_BODY_FRONT_TAG + body.text
        else:
            body.text = CMS_BODY_FRONT_TAG + body.text + \
                        CMS_BODY_BACK_TAG

        # google analytics
        if os.path.isfile(os.path.join(ROOT_DIR_PATH, 'info.json')):
            with open(os.path.join(ROOT_DIR_PATH, 'info.json')) as jsonfile:
                data = json.load(jsonfile)
            upc = data['upc']
            
            # comment
            analyticsComment = etree.Comment(
                text=get_template(
                    'boilerplate/txt/AnalyticsComment.txt').render())
            includeHtml = etree.Comment("### include _tracking.html ###")

            body.append(analyticsComment)
            body.append(includeHtml)

        # add header legel
        html = page.getroot()
        html.addprevious(legel)

    def replaceKeyword(self, page, fileDict):
        for tag in page.iter():
            if tag.tag == 'script' or tag.tag == 'style':
                if tag.text:  
                    for path in fileDict:
                        result = os.path.splitext(path)[-1]
                        if not "." in result:
                            continue

                        if ("./" + path) in tag.text:
                            tag.text = tag.text.replace(
                                ("./" + path), fileDict[path])
                        if path in tag.text:
                            tag.text = tag.text.replace(path, fileDict[path])

            for name, value in tag.items():
                for path in fileDict:
                    value = value.replace('\\', '/')
                    # 考慮到有時 attr 的 value 可能不只 static 路徑，
                    # 像是attr="return func('static')"等等，所以才用現在的方式

                    # 因為上述原因，會導致有些會有以下狀況
                    # "./images/g2.jpg"" 會變 "./{% static 'images/g2.jpg' %}"
                    # 所有暫且先這樣
                    if ("./" + path) in value or value == ("./" + path):
                        newValue = value.replace(("./" + path), fileDict[path])
                        tag.set(name, newValue)
                        value = newValue
                    if path in value:
                        if fileDict[path] in value:
                            continue
                        newValue = value.replace(path, fileDict[path])
                        tag.set(name, newValue)
                        
    def changeWordToMd5(self, page):
        for tag in page.iter():
            if not tag.tag in ['script', 'style',]:
                if tag.text:
                    for key in CHANGE_WORD:
                        tag.text = tag.text.replace(
                            key, hashlib.md5(key).hexdigest())
                if tag.tail:
                    for key in CHANGE_WORD:
                        tag.tail = tag.tail.replace(
                            key, hashlib.md5(key).hexdigest())

    #--------------------------------file---------------------------------------
    def findComment(self, filePath):
        commentDict = {}
        with open(filePath, 'r') as f:
            content = f.read()
            commentList = re.findall(r"\<!--((.|\n)*?)--\>", content)
            
            for comment in commentList:
                code = hashlib.md5(comment[0]).hexdigest()
                content = content.replace(
                    "<!--" + comment[0] + "-->", "<!--" + code + "-->")
                commentDict[code] = comment[0]
            
        with open(filePath, 'w') as f:
            f.write(content)
        return commentDict

    def fixComment(self, filePath, commentDict):
        with open(filePath, 'r') as f:
            content = f.read()
            for key in commentDict:
                content = content.replace(key, commentDict[key])
        with open(filePath, 'w') as f:
            f.write(content)

    def fixStaticTemplate(self, filePath, fileDict):
        with open(filePath, 'r') as f:
            content = f.read()

            for path in fileDict:
                if '.html' in path:
                    if path == 'index.html' or path == 'Index.html':
                        content = content.replace(fileDict[path], '/')
                    index = path.index(".html")
                    href = path[0:index]
                    templatePath = "/" + href
                    content = content.replace(fileDict[path], templatePath)
                else:
                    staticPath = "{% static '" + path + "' %}"
                    content = content.replace(fileDict[path], staticPath)

        with open(filePath, 'w') as f:
            f.write(content)
            
    def changeMd5ToWord(self, filePath):
        with open(filePath, 'r') as f:
            content = f.read()
        for key in CHANGE_WORD:
            content = content.replace(
                hashlib.md5(key).hexdigest(), CHANGE_WORD[key])
        with open(filePath, 'w') as f:
            f.write(content)

    def replaceExceptTags(self, filePath):
        with open(filePath, 'r') as f:
            content = f.read()
        for tag in EXCEPT_TAGS:
            # aa 開頭是為了讓 attr 排在最前面
            content = content.replace(
                '<%s' % tag, "<div aaboilerplateorigintag=\"%s\" " % tag)
            content = content.replace(
                '</%s>' % tag, 
                "</div><!--aaboilerplateorigintag=\"%s\"-->" % tag)
        with open(filePath, 'w') as f:
            f.write(content)

    def fixExceptTags(self, filePath):
        with open(filePath, 'r') as f:
            content = f.read()
        for tag in EXCEPT_TAGS:
            replaceList = re.findall(
                r"\</div>\n\s+\<!--aaboilerplateorigintag=\"%s\"--\>" % tag, 
                content)
            content = content.replace(
                "div aaboilerplateorigintag=\"%s\"" % tag, '%s' % tag)

            for replaceStr in replaceList:
                content = content.replace(replaceStr, '</%s>' % tag)
        with open(filePath, 'w') as f:
            f.write(content)
