from string import punctuation
from .cache import (
    cached_property,
    HITCODE_CACHE,
    MIMETYPE_CACHE,
)
from .mimes import (
    EXTENSION_MIMETYPE_MAP,
    ASSET_TYPES,
    ASSET_SUBTYPES,
    API_TYPES,
    API_SUBTYPES,
    AUDIO_VIDEO_TYPES,
    AUDIO_VIDEO_SUBTYPES,
    BOOK_SUBTYPES,
    DOCUMENT_SUBTYPES,
    FILES_TYPES,
    FILES_SUBTYPES,
    PAGE_VISUALS_TYPES,
    PROGRAMS_SUBTYPES,

    MAIN_TYPES,
    STRUCTURED_SUFFIXES,
    MIMETYPE_MAP,
    SUBTYPE_MAP,
    SUBTYPE_TREE_PREFIXES,

    REQ_CODES,
    REQ_NAMES,
    REQ_SHORT_NAMES,
)


def strip_punctuation(name) -> str:
    """
    handle garbage like [image/jpeg], application/javascript:
    """
    return name.strip('()[]:".')


def strip_duplicate_prefixes(prefix, name):
    """
    Remove duplicate prefixes from the name string

    image/image/jpg -> image/jpg
    application/application/application/javascript -> application/javascript
    """
    if not name.startswith(f'{prefix}/'):
        return f'{prefix}/{name}'

    return strip_duplicate_prefixes(prefix, name[len(prefix)+1:])


def dedupe_multiple_csv(name):
    """
    Return first value containing a slash and substring of comma-separated mimetypes
    image/png,image/gif

    Don't handle other comma-separated values like
    image/,png, etc
    """
    pieces = [mt.strip() for mt in name.split(',')]

    if len(pieces) > 1:
        for piece in pieces:
            if '/' in piece and piece[-1] != '/':
                return piece

    return name


class Mimetype:
    """
    Process mimetypes to have as few variations as possible
    Don't want the clutter of:

    image/jpg
    image/jpeg
    image;jpg
    application/javascript
    application/x-javascript
    x-application/javascript

    Normalize the above to:
    image/jpg
    application/javascript
    """

    def __init__(self, mimetype):
        self.original = mimetype
        self.mimetype = mimetype

        self.mtype = None
        self.subtype = None

    def __str__(self):
        return self.original

    def __repr__(self):
        return f'{self.__class__.__name__}({self.original})'

    def convert_to_slash(self) -> str:
        """
        If the type/subtype isn't correctly delimited with
        a slash, attempt to insert one.

        image:jgeg
        imagejpeg
        image'jpeg
        """
        if '/' in self.mimetype:
            return self.mimetype

        for mt in MAIN_TYPES:
            if self.mimetype.startswith(mt):
                mtlen = len(mt)
                mtype, subtype = self.mimetype[:mtlen], self.mimetype[mtlen:]

                # mimetype consists only of maintype such as 'file' / 'application'
                if not subtype:
                    self.mimetype = f'{mtype}/{mtype}'
                # image:jpg
                elif subtype[1] in punctuation:
                    self.mimetype = f'{mtype}/{subtype[1:]}'
                else:
                    # imagejpeg
                    self.mimetype = f'{mtype}/{subtype}'

                break

        return self.mimetype

    def strip_prefixes(self) -> str:
        """
        handle garbage like x-image/<subtype>, i-world/<subtype>
        """
        if self.mimetype.startswith(('x-', 'i-')):
            self.mimetype = self.mimetype[2:]
        return self.mimetype

    def strip_punctuation(self) -> str:
        """
        handle garbage like [image/jpeg]
        """
        self.mimetype = strip_punctuation(self.mimetype)
        return self.mimetype

    def strip_csv(self) -> str:
        self.mimetype = dedupe_multiple_csv(self.mimetype)
        return self.mimetype

    def clean_maintype(self) -> str:
        """
        Confirm that maintypes are not duplicated

        image/image/jpg -> image/jpg
        """
        for mtype in MAIN_TYPES:
            if not self.mimetype.startswith(mtype):
                continue
            self.mimetype = strip_duplicate_prefixes(mtype, self.mimetype)

        return self.mimetype

    def clean_subtype(self) -> str:
        """
        Remove incorrect punctuation after the slash
        image/.jpeg
        image//jpeg

        Remove unwanted subtype trees
        """
        try:
            self.mtype, subtype = self.mimetype.split('/')
        except ValueError:
            return self.mimetype

        subtype = self.strip_punctuation_separators(subtype)
        subtype = self.strip_subtype_trees(subtype)
        subtype = SUBTYPE_MAP.get(subtype, subtype)
        self.subtype = subtype

        self.mimetype = f'{self.mtype}/{subtype}'
        return self.mimetype

    def strip_subtype_trees(self, subtype: str) -> str:
        """
        Remove unwanted subtype tree prefixes such as vnd., x-, i-
        """
        for prefix in SUBTYPE_TREE_PREFIXES:
            if subtype.startswith(prefix):
                return subtype[len(prefix):]
        return subtype

    def strip_punctuation_separators(self, subtype: str) -> str:
        """
        Remove incorrect punctuation after the slash

        image/.jpeg
        image//jpeg
        """
        if subtype and subtype[0] in punctuation:
            return subtype[1:]
        return subtype

    def strip_mailcap(self) -> str:
        """
        Remove Mailcap string

        video/mpeg; xmpeg %s -> video/mpeg
        text/css; charset=utf8 -> text/css
        """
        semicolon_index = self.mimetype.find(';')

        if semicolon_index == -1:
            return self.mimetype

        slash_index = self.mimetype.find('/')
        if semicolon_index > slash_index:
            self.mimetype = strip_punctuation(self.mimetype[:semicolon_index])

            # reclean subtype because it now doesn't include mailcap
            self.clean_subtype()

        return self.mimetype

    def strip_structured_suffix(self) -> str:
        """
        Strip off structured suffix if one exists. Support "+" and "-" delimiter
        """
        mimetype = self.mimetype
        for suffix in STRUCTURED_SUFFIXES:
            if mimetype.endswith((f'+{suffix}', f'-{suffix}')):
                self.mimetype = mimetype[:-(len(suffix)+1)]
                break
        return self.mimetype

    @cached_property
    def clean(self) -> str:
        """
        Servers return many malformed mimetypes so
        clean up common errors, as well as mimetype
        variations to standardize data a bit.
        """
        cached = MIMETYPE_CACHE.get(self.original, '')
        if cached:
            try:
                self.mtype, self.subtype = cached.split('/')
            except ValueError:
                self.mtype, self.subtype = self.mimetype, ''
            return cached

        # preliminary cleaning
        self.strip_prefixes()
        self.strip_punctuation()
        self.strip_csv()

        # If if the MIMETYPE_MAP, no further cleaning required
        if self.mimetype in MIMETYPE_MAP:
            self.mimetype = MIMETYPE_MAP[self.mimetype]
            self.mtype, self.subtype = self.mimetype.split('/')
        else:
            self.convert_to_slash()
            self.clean_maintype()
            self.clean_subtype()
            self.strip_mailcap()
            self.strip_structured_suffix()

        MIMETYPE_CACHE[self.original] = self.mimetype
        return self.mimetype

    @cached_property
    def hit_code(self) -> int:
        """
        Get most likely class of activity that the URL hit represents.

        If we can't determine the mimetype, or if it's
        empty, consider it a "page".
        """
        mimetype = self.clean
        cached = HITCODE_CACHE.get(mimetype, '')
        if cached:
            return cached

        mtype = self.mtype
        subtype = self.subtype

        # if we know we're HTML, or if we couldn't determine
        # main type and subtype, return page
        if subtype == 'html' or (not mtype and not subtype):
            hc = REQ_CODES['page']

        # Check the fast & common stuff first
        elif mtype in ASSET_TYPES or subtype in ASSET_SUBTYPES:
            hc = REQ_CODES['pageassets']

        elif mtype in PAGE_VISUALS_TYPES:
            hc = REQ_CODES['pagevisuals']

        elif mtype in AUDIO_VIDEO_TYPES or subtype in AUDIO_VIDEO_SUBTYPES:
            hc = REQ_CODES['audiovideo']

        elif subtype in DOCUMENT_SUBTYPES:
            hc = REQ_CODES['documents']

        elif mtype in API_TYPES or subtype in API_SUBTYPES:
            hc = REQ_CODES['api']

        elif subtype in PROGRAMS_SUBTYPES:
            hc = REQ_CODES['programs']

        elif subtype in BOOK_SUBTYPES:
            hc = REQ_CODES['books']

        # Catchall. Check after other more specific matches are checked
        elif subtype in FILES_SUBTYPES or mtype in FILES_TYPES:
            hc = REQ_CODES['files']

        else:
            # when in doubt, always assign "page", which is most prominent
            hc = REQ_CODES['page']

        HITCODE_CACHE[mimetype] = hc

        return hc

    def request_name(self) -> str:
        return REQ_NAMES[self.hit_code]

    def request_short_name(self) -> str:
        return REQ_SHORT_NAMES[self.hit_code]

    def is_tech_service(self) -> bool:
        """
        Request type pertains to background technical services
        such as scripting, encryption checks, etc. Probably not
        directly user-generated.

        Checked when Redwood wasn't able to determine a classifier score.
        """
        return self.subtype in FILES_SUBTYPES


class Extension:
    """
    Get Mimetype from URL path filename extension
    """

    def __init__(self, extension: str) -> None:
        self.extension = extension.lower().strip(' .')

    def __str__(self) -> str:
        return self.extension

    def __repr__(self) -> str:
        return f'{self.__class__.__name__}({self.extension})'

    def mimetype(self) -> str:
        if not self.extension:
            return ''

        if mime := EXTENSION_MIMETYPE_MAP.get(self.extension, ''):
            return Mimetype(mime).clean

        return ''
