#!/usr/bin/env python
# -*- coding: utf-8 -*-
# $Id: W3layout.py 10244 2017-09-03 06:25:56Z Lavender $
#
# Copyright (c) 2015 Nuwa Information Co., Ltd, All Rights Reserved.
#
# Licensed under the Proprietary License,
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at our web site.
#
# See the License for the specific language governing permissions and
# limitations under the License.
#
# $Author: Lavender $
# $Date: 2017-09-03 14:25:56 +0800 (週日, 03 九月 2017) $
# $Revision: 10244 $

import scrapy

from Downloader.items import DownloaderItem as Item

class W3layoutSpider(scrapy.Spider):
    name = "w3layout"

    def start_requests(self):
        urls = ['https://w3layouts.com/page/1/',]
        for url in urls:
            print '-----Start download %s -----' % url
            yield scrapy.Request(url=url, callback=self.parse)
    
    def parse(self, response):
        urls = response.css('.content div div a::attr(href)').extract()

        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse_author)
        
        pages = response.css('.pagination li a')
        for page in pages:
            pageName = page.css('::text').extract()[0]
            if 'Next' in pageName:
                nextUrl = page.css('::attr(href)').extract()[0]
                print '-----Start download %s -----' % nextUrl
                yield scrapy.Request(url=nextUrl, callback=self.parse)
        
        
    def parse_author(self, response):
        item = Item()     
        url = response.css(
            ".web-version-preview ul li a::attr(href)").extract()[2]
        id = url[(url.index("?l=") + 3):url.index("&")]
        
        # file
        item['downloadUrl'] = 'http://d.w3layouts.com/downloadfree/%s' % id
        
        # data
        item["slug"] = response.url.split("/")[-2]
        item['title'] = response.css(
            ".sub-title h1::text"
            ).extract_first(default=None)
            
        item["source"] = response.url
        item['previewUrl'] = 'https://w3layouts.com' + response.css(
            ".web-version-preview ul li a::attr(href)"
            ).extract_first(default=None)
            
         
        item['license'] = response.css(
            ".post_data").re_first(r'License(.*)\<')
        
        if item['license'] is None:
            item['license'] = (
                'Life Time Free License Under Creative Commons '
                'Attribution 3.0 Unported. Unlimited Use, you can '
                'help & support us (W3Layouts, a Non-Profit) by '
                'donations or you should keep link to our website.')
        else:
            i1 = item['license'].index(":")
            item['license'] = item['license'][i1 + 1:]
        
        dateCreate = response.css(
            ".post_data").re_first(r'Date\sCreated(.*)\<')
            
        item['dateCreate'] = dateCreate
        
        item['description'] = response.css(
            ".post_data p[style='text-align: center;'] + p::text"
            ).extract_first(default=None)
            
        description2 = response.css(".post_data").extract()
        item['description2'] = ''.join(description2)
            
        item['category'] = response.css(
            ".post_data footer p a[rel='tag']::text").extract()
        item['author'] = response.css(
            ".post_data footer span span a[rel='author']::text"
            ).extract_first(default=None)
            
        # image
        item['imageUrl'] = response.css(
            ".web-version-preview a img::attr(src)"
            ).extract()
            
        otherImg = response.css(
            ".post_data img::attr(src)"
            ).extract()
            
        for img in otherImg:
            item['imageUrl'].append(img)
          
        yield item
        