###############################################################################
# Local Security Check Automation Framework
#
# Authors:
# Veerendra GG <veerendragg@secpod.com>
#
# Revision 1.0
# Date: 2009/05/20
#
# Copyright:
# Copyright (c) 2009 SecPod , http://www.secpod.org
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
# (or any later version), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
###############################################################################

import re
import os
import sys

from common import utils

append_url = 'http://www.gentoo.org/security/en/glsa/'

class Parser:
    """
    Gentoo security advisory parser, parse and populate the global variables
    """

    ## Global parse structure, initializing
    AdvID = ''
    Description = ''
    Packages = {}
    CVEs = ''
    Name = ''
    Summary = ''
    Platforms = ''
    Product = []
    Html_content = ''
    XREF = []
    FileName = ''
    total_prod_list = []


    def _getYearLinks(self, link, year, debug=0):
        """
        Gets the advisory links for the given year
        """
        year_links = []
        data = utils.getHTMLCon(link)
        links = re.findall('<a href="http://security.gentoo.org/glsa/'+ \
                                         '(glsa-' + str(year) + '.*)">', data)
        if links:
            links = utils.removeDups(links)
            for i in links:
                year_links.append(append_url + i)

            if debug:

                if year_links:
                    print "Total (%s) Gentoo Security Advisories for (%s)" \
                                         "  year: " %(len(year_links), year)
                    print "\nGentoo Security Advisory for (%s) year" %(year)
                    for i in year_links:
                        print i

            return year_links

        return []


    def fetchHTML(self, year, debug=0):
        """
        Retrive Gentoo Advisories locally
        """

        try:
            all_adv_links = []

            year_links = self._getYearLinks(self.main_url, year, debug)

            if not year_links:
                print "ERROR: Din't find Gentoo Security Advisories for "+ \
                                                           "(%s) year" %(year)
                print "Exiting ..."
                sys.exit(0)

            ## Remove duplicate links
            all_adv_links = utils.removeDups(year_links)

            ## Construct file name and retrive the advisory
            for adv_url in all_adv_links:
                base_name = adv_url.split('/')[-1]
                file_name = self.html_cache + base_name.replace('.xml','.html')

                if not os.path.isfile(file_name):
                    if debug:
                        print "\nFetching Gentoo Advisory..." + \
                                         os.path.basename(adv_url)
                    try:
                        utils.fetchFiles(adv_url, file_name, debug)
                    except Exception, msg:
                        print 'ERROR: Error fething the url %s' % msg

        except Exception, msg:
            print "Exception in : gentoo -> Parser(Class) -> fetchHTML method()"
            sys.exit(msg)


    def _findAll(self, regex):
        """
        Returns Matched data
        """
        return regex.findall(self.Html_content, re.IGNORECASE)


    def getCVE(self, debug=0):
        """
        Returns CVE list
        """
        if debug:
            print "\nGetting CVE List..."

        cve_regex = re.compile('CVE-[0-9]+-[0-9]+')
        can_regex = re.compile('CAN-[0-9]+-[0-9]+')

        cve_list = self._findAll(cve_regex)
        cve_list.extend(self._findAll(can_regex))

        cve_list = utils.removeDups(cve_list)

        if cve_list:
            cve_list = '", "'.join(cve_list)
        else:
            cve_list = ''

        if debug:
            print "CVE List : ", cve_list

        return cve_list


    def getAdvID(self, debug=0):
        """
        Returns Gentoo Security Advisory ID
        """

        if debug:
            print "\nGetting Advisory ID..."

        adv_id_regex =  re.compile('Advisory Reference.*\n.*(GLSA.\d+-\d+) /')
        adv_id = self._findAll(adv_id_regex)

        if not adv_id:
            return ''

        if debug:
            print "Advisory ID : ", adv_id

        return adv_id[0].strip()


    def getAffectedPackage(self, debug=0):
        """
        Returns Affected Packages/RPM's
        """

        if debug:
            print "\nGetting Affected Packages/RPM List..."

        pkg_regex=  re.compile("Advisory Reference.*\n.*GLSA.\d+-\d+ /.?(.*)<")
        pkg = self._findAll(pkg_regex)

        if pkg:
            pkg = pkg[0].strip()
        else:
            pkg = ''

        if debug:
            print "Affected Packages/RPMS : ", pkg

        return pkg


    def getDescription(self, debug=0):
        """
        Returns Vulnerability Description
        """
        description = ''

        if debug:
            print "\nGetting Vulnerability Description..."

        desc_regex =  re.compile("(?s)Description<(.*)Impact<", re.IGNORECASE)
        desc = self._findAll(desc_regex)

        if desc:
            desc = desc[0].strip()

            ## Formatting the description
            for line in desc.split('\n'):
                line = line.strip()
                if "<p" in line or "p>" in line:
                    continue
                line = line.strip('<li>').strip('</li>').strip('<ul>')
                description += '  ' + line

            description = description.replace('"'," &qt ")

        if description:
            ## Format line which exceeds 79 character.
            description = utils.formatMultiLines(description)
            description = description.strip()     

        return description


    def getImapct(self, debug=0):
        """
        Returns Vulnerability Description
        """
        impact_desc = ''

        if debug:
            print "\nGetting Vulnerability Impact..."

        impact_regex =  re.compile("(?s)Description<(.*)Resolution "+ \
                                               "Information<", re.IGNORECASE)
        impact = self._findAll(impact_regex)

        if impact:
            impact = impact[0].strip()
            impact =  re.findall("(?s)Impact<.*",impact)

            if impact:
                impact = impact[0].strip()
                ## Formatting the Impact
                for line in impact.split('\n'):
                    line = line.strip()
                    if "<p" in line or "p>" in line or "span>" in line:
                        continue
                    line = line.strip('<li>').strip('</li>').strip('<ul>')
                    impact_desc += '  ' + line

                impact_desc = impact_desc.replace('"'," &qt ")


        if impact_desc:
            ## Format line which exceeds 79 character.
            impact_desc = utils.formatMultiLines(impact_desc)
            impact_desc = impact_desc.strip()
        elif debug:
            print "Warning: Impact Not found..."

        return impact_desc


    def getAffectedProduct(self, debug=0):
        """
        Returns Affected Product/Platform
        """
        products = ''

        ## Different Architectures
        architectures = 'All supported architecture.?|Intel compatible|'+ \
                        'AMD64|x86 ppc hppa'

        ## Get Affected Product/Platform
        prod_regex =  re.compile(architectures, re.IGNORECASE)
        products = self._findAll(prod_regex)

        if products:
            products = products[0].strip()

        if debug:
            print "\nAffected Product is/are : (%s)" %(products)

        return products


    def getRPM(self, prod,  debug=0):
        """
        Returns OS Package Dictionary
        """

        if debug:
            print "\nGetting RPM List..."

        os_pkg_dict = {}
        all_pkgs = {}

        pkg_sec = re.findall("(?s)>Architecture.*Related bugreport", \
                                                            self.Html_content)
        if not pkg_sec:
            if debug:
                print "ERROR: Didn't find Package Section"
            return os_pkg_dict

        ## Parse each line and construct dict having Package and 
        ## their required elements.
        ## Result at the end of the for loop : 
        ## {'kde-base/kdelibs': ['lt', '4.0', 'rge', '3.5.8-r4', 'rge',
        ## '3.5.9-r3', 'gt', '4.0', 'lt', '3.5.5', 'rge', '3.5.10-r2']}}"

        pkg_sec = pkg_sec[0].strip()
        pkg = ''
        for line in pkg_sec.split('\n'):
            line = line.strip()

            tmp = re.findall('<td.*">(.*)<',line)
            if tmp: 
                pkg = tmp[0].strip()
                if pkg:
                    if debug:
                        print "Package Name : ", pkg
                    all_pkgs[pkg] = []
                continue

            if pkg and "&lt;=" in line:
                if debug:
                    print "Package less then (<) : ", line
                if "revision" in line:
                    all_pkgs[pkg].append("rle")
                else:
                    all_pkgs[pkg].append("le")
                continue

            if pkg and "&lt;" in line:
                if debug:
                    print "Package less then (<) : ", line
                if "revision" in line:
                    all_pkgs[pkg].append("rlt")
                else:
                    all_pkgs[pkg].append("lt")
                continue

            if pkg and "&gt;=" in line:
                if debug:
                    print "Package greater then equal (>=) : ", line
                if "revision" in line:
                    all_pkgs[pkg].append("rge")
                else:
                    all_pkgs[pkg].append("ge")
                continue

            if pkg and "&gt;" in line:
                if debug:
                    print "Package greater then equal (>) : ", line
                if "revision" in line:
                    all_pkgs[pkg].append("rgt")
                else:
                    all_pkgs[pkg].append("gt")
                continue

            if pkg and "=" in line:
                if debug:
                    print "Package equal (=) : ", line
                line = line.strip().strip('</td>')
                if line == '=':
                    if "revision" in line:
                        all_pkgs[pkg].append("req")
                    else:
                        all_pkgs[pkg].append("eq")
                continue

            if pkg:
                ver = re.findall('([0-9.]+)', line)
                if ver:
                    ver = re.findall('([0-9.]+.*)<', line)
                    if not ver:
                        ver = re.findall('([0-9.]+.*),', line)
                    if ver:
                        if debug:
                            print "Package Version : ", line
                        all_pkgs[pkg].append(ver[0])
                    elif debug and 'AMD' not in line:
                        print "ERROR: Version not found : ", line
                    
                    if "</td>" in line:
                        all_pkgs[pkg].append("break")
                continue

        if debug:
            print "\nProduct : ", prod
            print "ALL Packges and versions are : ", all_pkgs

        ## Add the OS with packages.
        if all_pkgs and prod:
           os_pkg_dict[prod] = all_pkgs

        return os_pkg_dict


    def formatReference(self, main_url, file_name):
       """
       Constructs a reference for advisory
       """
       if not main_url.endswith('/'):
           main_url = main_url + '/'

       reference = main_url + file_name.replace('.html','.xml')

       return reference


    def parser(self, html_content, debug=0):
        """
        Main parser function, builds the parser object
        by invoking parse functions
        """

        try:
            if debug:
                print "Gentoo Parser Initiated..."

            self.Html_content = html_content.replace('\r\n', '\n')

            self.CVEs = self.getCVE(debug)

            self.Platforms = self.getAffectedProduct(debug)
            if not self.Platforms or self.Platforms == []:
                if debug:
                    print "\nERROR: Required Products not found..."
                return False

            self.Packages = self.getRPM(self.Platforms, debug)
            if not self.Packages or self.Packages == '':
                if debug:
                    print "\nERROR: Required Packages not found..."
                return False

            self.Description = self.getDescription(debug)
            if not self.Description or self.Description == '':
                if debug:
                    print "\nERROR: Description not found..."
                return False

            self.AdvID = self.getAdvID(debug)
            if not self.AdvID or self.AdvID == '':
                if debug:
                    print "\nERROR: Advisory ID not found..."
                return False

            self.Product = self.getAffectedPackage(debug)
            if not self.Product or self.Product == '':
                if debug:
                    print "\nERROR: Required Products not found..."
                return False

            if len(self.Product) > 40:
                if len(self.Product.split()[0]) < 40:
                    product = self.Product.split()[0]
                elif len(self.Product.split(',')[0]) < 40:
                    product = self.Product.split(',')[0]
                product = product.strip(',') + ' and other packages'
            else:
                product = self.Product

            self.Summary = product

            self.Name = product + " " + self.AdvID

            self.Impact = self.getImapct(debug)

            ## Construct File Name
            self.FileName = "gentoo_" + self.AdvID.replace(' ', '_')\
                                                             .replace('-','_')
            ## Set XREF
            (name, value) = self.AdvID.split()
            self.XREF = [name, value]

            if debug:
                print "\nAll mandatory attributes are parsed: ", self.AdvID

            return True

        except Exception, msg:
            print 'Exception in Parser gentoo -> Parser -> parser() Method ', msg
            sys.exit(msg)
