from rapidfuzz import fuzz
from rapidfuzz.process import extractOne
from .tools import punctuation_to_spaces

# Strip these words from keywords in Media / Channel files
KEYWORD_STOP_WORDS = {
    "a",
    "about",
    "above",
    "above",
    "across",
    "after",
    "afterwards",
    "again",
    "against",
    "all",
    "almost",
    "alone",
    "along",
    "already",
    "also",
    "although",
    "always",
    "am",
    "among",
    "amongst",
    "amoungst",
    "amount",
    "an",
    "and",
    "another",
    "any",
    "anyhow",
    "anyone",
    "anything",
    "anyway",
    "anywhere",
    "are",
    "around",
    "as",
    "at",
    "back",
    "be",
    "became",
    "because",
    "become",
    "becomes",
    "becoming",
    "been",
    "before",
    "beforehand",
    "behind",
    "being",
    "below",
    "beside",
    "besides",
    "between",
    "beyond",
    "bill",
    "both",
    "bottom",
    "but",
    "by",
    "call",
    "can",
    "cannot",
    "cant",
    "co",
    "con",
    "could",
    "couldnt",
    "cry",
    "de",
    "describe",
    "detail",
    "do",
    "done",
    "down",
    "due",
    "during",
    "each",
    "eg",
    "eight",
    "either",
    "eleven",
    "else",
    "elsewhere",
    "empty",
    "enough",
    "etc",
    "even",
    "ever",
    "every",
    "everyone",
    "everything",
    "everywhere",
    "except",
    "few",
    "fifteen",
    "fify",
    "fill",
    "find",
    "fire",
    "first",
    "five",
    "for",
    "former",
    "formerly",
    "forty",
    "found",
    "four",
    "from",
    "front",
    "full",
    "further",
    "get",
    "give",
    "go",
    "had",
    "has",
    "hasnt",
    "have",
    "he",
    "hence",
    "her",
    "here",
    "hereafter",
    "hereby",
    "herein",
    "hereupon",
    "hers",
    "herself",
    "him",
    "himself",
    "his",
    "how",
    "however",
    "hundred",
    "ie",
    "if",
    "in",
    "inc",
    "indeed",
    "interest",
    "into",
    "is",
    "it",
    "its",
    "itself",
    "keep",
    "last",
    "latter",
    "latterly",
    "least",
    "less",
    "ltd",
    "made",
    "many",
    "may",
    "me",
    "meanwhile",
    "might",
    "mill",
    "mine",
    "more",
    "moreover",
    "most",
    "mostly",
    "move",
    "much",
    "must",
    "my",
    "myself",
    "name",
    "namely",
    "neither",
    "never",
    "nevertheless",
    "next",
    "nine",
    "no",
    "nobody",
    "none",
    "noone",
    "nor",
    "not",
    "nothing",
    "now",
    "nowhere",
    "of",
    "off",
    "often",
    "on",
    "once",
    "one",
    "only",
    "onto",
    "or",
    "other",
    "others",
    "otherwise",
    "our",
    "ours",
    "ourselves",
    "out",
    "over",
    "own",
    "part",
    "per",
    "perhaps",
    "please",
    "put",
    "rather",
    "re",
    "same",
    "see",
    "seem",
    "seemed",
    "seeming",
    "seems",
    "serious",
    "several",
    "she",
    "should",
    "show",
    "side",
    "since",
    "sincere",
    "six",
    "sixty",
    "so",
    "some",
    "somehow",
    "someone",
    "something",
    "sometime",
    "sometimes",
    "somewhere",
    "still",
    "such",
    "system",
    "take",
    "ten",
    "than",
    "that",
    "the",
    "their",
    "them",
    "themselves",
    "then",
    "thence",
    "there",
    "thereafter",
    "thereby",
    "therefore",
    "therein",
    "thereupon",
    "these",
    "they",
    "thick",
    "thin",
    "third",
    "this",
    "those",
    "though",
    "three",
    "through",
    "throughout",
    "thru",
    "thus",
    "to",
    "together",
    "too",
    "top",
    "toward",
    "towards",
    "twelve",
    "twenty",
    "two",
    "un",
    "under",
    "until",
    "up",
    "upon",
    "us",
    "very",
    "via",
    "was",
    "we",
    "well",
    "were",
    "what",
    "whatever",
    "when",
    "whence",
    "whenever",
    "where",
    "whereafter",
    "whereas",
    "whereby",
    "wherein",
    "whereupon",
    "wherever",
    "whether",
    "which",
    "while",
    "whither",
    "who",
    "whoever",
    "whole",
    "whom",
    "whose",
    "why",
    "will",
    "with",
    "within",
    "without",
    "would",
    "yet",
    "you",
    "your",
    "yours",
    "yourself",
    "yourselves",
    "the",
}

STOP_WORDS = {
    'accountability',
    'amish',
    'brotherhood',
    'brotherhoods',
    'church',
    'companies',
    'company',
    'conference',
    'conferences',
    'conservative',
    'family',
    'fellowship',
    'follower',
    'followers',
    'hours',
    'list',
    'lists',
    'llc',
    'ltd',
    'mennonite',
    'mennonites',
    'nation',
    'national',
    'nationwide',
    'secretaries',
    'secretary',
    # Console-related terms that don't add distinctiveness
    'allow',
    'allowed',
    'always',
    'android',
    'app',
    'apps',
    'appstore',
    'blacklist',
    'blanketblock',
    'block',
    'blocked',
    'blocking',
    'blocks',
    'blue',
    'browser',
    'browsers',
    'bypass',
    'captive',
    'categories',
    'category',
    'communication',
    'communications',
    'computer',
    'computers',
    'computing',
    'custom',
    'default',
    'dep',
    'dev',
    'develop',
    'development',
    'device',
    'devices',
    'domain',
    'domains',
    'email',
    'filter',
    'filtering',
    'filters',
    'ios',
    'iphone',
    'iphones',
    'kindle',
    'linux',
    'logger',
    'logging',
    'network',
    'networking',
    'networks',
    'osx',
    'pattern',
    'patterns',
    'phone',
    'phones',
    'policies',
    'policy',
    'portal',
    'profile',
    'profiles',
    'program',
    'programmer',
    'programming',
    'server',
    'servers',
    'site',
    'sites',
    'ssl',
    'tablet',
    'tablets',
    'url',
    'urls',
    'whitelist',
    'windows',
    # state names & abbreviations
    'alabama',
    'alaska',
    'arizona',
    'arkansas',
    'california',
    'colorado',
    'connecticut',
    'delaware',
    'district of columbia',
    'florida',
    'georgia',
    'hawaii',
    'idaho',
    'illinois',
    'indiana',
    'iowa',
    'kansas',
    'kentucky',
    'louisiana',
    'maine',
    'maryland',
    'massachusetts',
    'michigan',
    'minnesota',
    'mississippi',
    'missouri',
    'montana',
    'nebraska',
    'nevada',
    'new hampshire',
    'new jersey',
    'new mexico',
    'new york',
    'north carolina',
    'north dakota',
    'ohio',
    'oklahoma',
    'oregon',
    'pennsylvania',
    'rhode island',
    'south carolina',
    'south dakota',
    'tennessee',
    'texas',
    'utah',
    'vermont',
    'virginia',
    'washington',
    'west virginia',
    'wisconsin',
    'wyoming',
    'al',
    'ak',
    'az',
    'ar',
    'ca',
    'co',
    'ct',
    'de',
    'dc',
    'fl',
    'ga',
    'hi',
    'id',
    'il',
    'in',
    'ia',
    'ks',
    'ky',
    'la',
    'me',
    'md',
    'ma',
    'mi',
    'mn',
    'ms',
    'mo',
    'mt',
    'ne',
    'nv',
    'nh',
    'nj',
    'nm',
    'ny',
    'nc',
    'nd',
    'oh',
    'ok',
    'or',
    'pa',
    'ri',
    'sc',
    'sd',
    'tn',
    'tx',
    'ut',
    'vt',
    'va',
    'wa',
    'wv',
    'wi',
    'wy',
}

# Remove these words from the target phrase, because we don't want such
# words to count as being similar to the original term
TARGET_STOP_WORDS = {
    'all',
    'blue',
    'custom',
    'day',
    'dinner',
    'drawbridge',
    'employee',
    'employees',
    'farm',
    'garage',
    'gold',
    'guest',
    'house',
    'orbit',
    'sales',
    'service',
    'ship',
    'shipping',
    'shop',
    'silver',
    'work',
    'workers',
    'works',
    'yard',
}

STOP_WORDS.update(KEYWORD_STOP_WORDS)


def get_similarity(name: str, target: str, cutoff_multiplier: float = 0.0) -> float:
    """
    Get fuzzy ratio between term and target keyword.

    :param name: The string name of a record.
    :param target: The string to compare the name to.
    :param cutoff_multiplier: Increase the sensitivity of the result by specified percentage.
    """
    if not name or not target:
        return 0.0

    name = normalize_name(name, compare_target=False)
    target = normalize_name(target, compare_target=True)

    cutoff = score_cutoff(name, cutoff_multiplier)

    return fuzz.ratio(name, target, score_cutoff=cutoff)


def get_most_similar(
    name: str,
    targets: list[str],
    cutoff_multiplier: float = 0.0,
) -> tuple[str, float, int] | None:
    """
    Compare the name to the list of targets and return the
    most similar targets.

    :param name: The string name of a record.
    :param targets: The list of strings to compare the name to.
    :param cutoff_multiplier: Increase the sensitivity of the result by specified percentage.
    """
    name = normalize_name(name, compare_target=False)
    targets = [normalize_name(w, compare_target=True) for w in targets]

    if not name or not targets:
        return None

    cutoff = score_cutoff(name, cutoff_multiplier)

    return extractOne(name, targets, scorer=fuzz.ratio, score_cutoff=cutoff)


def normalize_name(
    name: str,
    compare_target: bool = False,
    target_stop_words: set | None = None,
) -> str:
    """
    Normalize a record name for comparison to other names.

    :param name: The string name of a record.
    :param compare_target: Is word is a comparison target that other words
        will be compared against? If so, extra stop words will be removed
        to ensure most desired comparison matching.
    :param target_stop_words: Set of words to remove from string if compare_target
    """
    name_words = [w.strip() for w in punctuation_to_spaces(name.lower()).split()]
    n_term = set(name_words) - STOP_WORDS

    if compare_target:
        targets = target_stop_words or TARGET_STOP_WORDS
        n_term = n_term - targets

    return ' '.join([w for w in name_words if w and len(w) > 1 and w in n_term]).title()


def score_cutoff(name: str, multiplier: float = 0.0) -> int:
    """
    Calculate the score cutoff based on the name length.

    :param name: The string name of a record.
    :param multiplier: The multiplier amount by which to increase the score cutoff.
    """
    name_length = len(name)

    if name_length < 5:
        cutoff = 85
    elif name_length < 10:
        cutoff = 70
    elif name_length < 15:
        cutoff = 60
    elif name_length <= 20:
        cutoff = 55
    elif name_length <= 25:
        cutoff = 50
    else:
        cutoff = 45

    if multiplier:
        return int(cutoff * (1 + multiplier))

    return cutoff


def pretty_name(
    name: str,
    prefixes: tuple[str, ...] = (),
    suffixes: tuple[str, ...] = (),
    prettyify: bool = True,
) -> str:
    """
    Take config name and remove suffixes / uniquifiers
    and only display what's interesting to humans.

    >>> pretty_name('Vivaldi_1_apg', suffixes=('_apg', '_tg'))
    Vivaldi

    >>> pretty_name('Lunch_5_tg', suffixes=('_apg', '_tg'))
    Lunch

    >>> pretty_name('lc_lunchtime_afternoon_break', prefixes=('lc_', 'af_'))
    lunchtime afternoon break

    >>> pretty_name('lc_af_lunchtime_afternoon_break', prefixes=('lc_', 'af_'))
    lunchtime afternoon break

    >>> pretty_name('Shipping_5')
    Shipping

    >>> pretty_name('darmar_tga_542', suffixes=('_apg', '_tga'))
    darmar

    >>> pretty_name('darmar_542_tga_apg', suffixes=('_apg', '_tga'))
    darmar

    >>> pretty_name('darmar_542_tga_345_apg', suffixes=('_apg', '_tga'))
    darmar
    """

    try:
        name = name.strip().rstrip('01234567890').strip('_-')
    except AttributeError:
        name = ''

    if not name:
        return ''

    if prefixes and name.startswith(prefixes):
        name = pretty_name(name[name.find('_') + 1 :], prefixes, suffixes, prettyify=False)

    if suffixes and name.endswith(suffixes):
        try:
            name = pretty_name(name[: name.rindex('_')], prefixes, suffixes, prettyify=False)
        except ValueError:
            pass

    if not prettyify:
        return name

    # if there's a _<pk> suffix remaining, strip it
    return name.rstrip('01234567890').strip('_-').replace('_', ' ')


__all__ = (
    'get_similarity',
    'get_most_similar',
    'pretty_name',
    'KEYWORD_STOP_WORDS',
)
