from invoke import Context, task
from textblob import TextBlob

from lcrequests import LogCabinHttpSession, Response
from lchttp import HTMLParser
from lchttp.settings import STOP_WORDS
from .stop_words import HTML_TAGS
from string import punctuation

mt = str.maketrans({k: ' ' for k in punctuation})
STOP_WORDS.update(HTML_TAGS)


@task
def parse(ctx: Context, url, count=False, nouns=False, all=False):
    """
    Download page and print the visible text.
    """
    response = download_page(url)
    hp = HTMLParser(response.text).visible_text()
    vt = '\n'.join(hp)
    analyze(vt, count, nouns, all)


def download_page(url: str) -> Response:
    if not url.startswith('http'):
        url = f'http://{url}'

    try:
        request = LogCabinHttpSession(url)
        response = request.get()
        if response.status_code != 200:
            print(f'Unable to retrieve URL. Status code {response.status_code}')
            return
    except Exception as e:
        print(e)
        print('An error occurred; Unable to retrieve URL')
        return
    return response


def analyze(vt: str, count=False, nouns=False, all=False):
    tb = TextBlob(vt)

    if count or all:
        print('Word counts:\n============')
        wc = tb.word_counts
        wcd = {
            k: v
            for k, v in sorted(wc.items(), key=lambda item: (item[1], item[0]))
            if k.lower not in STOP_WORDS and len(k) > 4
        }
        for k, v in wcd.items():
            print(f'{k}: {v}')

    if nouns or all:
        print('\nNoun phrase counts:\n===================')
        # list without dupes:
        nouns = list(dict.fromkeys(tb.noun_phrases))
        nouns_d1 = {k: tb.noun_phrases.count(k) for k in nouns if ' ' in k}
        nouns_d2 = dict(sorted(nouns_d1.items(), key=lambda item: (item[1], item[0])))
        for k, v in nouns_d2.items():
            cp = clean_phrase(str(k))
            if len(cp) > 6 and ' ' in cp:
                print(f'{cp}: {v}')


def clean_phrase(phrase: str) -> str:
    s = ' '.join(phrase.translate(mt).split())
    return s


__all__ = [
    'parse',
]
