from collections import defaultdict
from redwoodctl.abbreviations import DOMAIN, IP, PHRASE, REGEX, BASIC_RULE_TYPES
from redwoodctl.typehints import (
    RatingStat,
    RatingCodeStats,
    RULE_TYPES,
    TallyStat,
    TallyStats,
    RULE_NAMES_COUNT,
    TALLY_RESPONSES,
)
from typing import cast
from ..settings import SCORE_THRESHOLD


def rule_type(rule: str) -> RULE_TYPES:
    """
    Calculate rule type based on the delimiters.
    """
    if rule.startswith("<"):
        return PHRASE
    if rule.startswith("/"):
        return REGEX
    if rule.startswith("ip:"):
        return IP
    return DOMAIN


def calculate_tally_stats(tally: TALLY_RESPONSES) -> TallyStats | None:
    """
    Returns stats for list of Rule objects
        - Unique count of each type of rule
        - Frequency count of each type of rule
    """
    phrases, phrase_count = set(), 0
    regex, regex_count = set(), 0
    domains, domains_count = set(), 0
    ips, ips_count = set(), 0

    for rule, count in tally.rules.items():
        if rt := rule_type(rule) == PHRASE:
            phrases.add(rule)
            phrase_count += count
        elif rt == REGEX:
            regex.add(rule)  # type: ignore[unreachable]
            regex_count += count
        elif rt == IP:
            ips.add(rule)  # type: ignore[unreachable]
            ips_count += count
        else:
            domains.add(rule)
            domains_count += count

    return TallyStats(
        domain=TallyStat(frq=domains_count, uniq=len(domains), rules=domains),
        ip=TallyStat(frq=ips_count, uniq=len(ips), rules=ips),
        phrase=TallyStat(frq=phrase_count, uniq=len(phrases), rules=phrases),
        regex=TallyStat(frq=regex_count, uniq=len(regex), rules=regex),
    )


def add_rating_scores(tally: TALLY_RESPONSES) -> RatingCodeStats:
    """
    Add rating scores and confidence to parsed Tally.
    """
    ratings: dict[str, RatingStat] = {}

    for category, stats in tally.classifierAnalysis.items():
        # Avoid skewing the Rating, by ensuring that all
        # category scores exceed the minimum threshold.
        if stats.score < SCORE_THRESHOLD:
            continue

        # Avoid calculating ratings from child categories
        # that had no phrase matches, since URL rules have
        # low classifying value.
        if not stats.phrase_rules and "/" in category:
            continue

        rating = stats.rating

        try:
            ratings[rating].total_score += stats.score
            ratings[rating].phrase_score += stats.phrase_score
            ratings[rating].phrase_count += len(stats.phrase_rules)
        except KeyError:
            ratings[rating] = RatingStat(
                total_score=stats.score,
                phrase_score=stats.phrase_score,
                phrase_count=len(stats.phrase_rules),
            )

    return cast(RatingCodeStats, dict(ratings))


def collect_tally_rules(url_tally: TALLY_RESPONSES) -> RULE_NAMES_COUNT:
    """
    The URL classifier doesn't have a `rules` key,
    so collect the patterns from the `scoreAnalysis`.
    """
    score_analysis = url_tally.scoreAnalysis
    rules: RULE_NAMES_COUNT = defaultdict(int)

    for category in url_tally.categories:
        for rule in score_analysis[category]:
            rules[rule] += 1

    return dict(rules)


def collect_rule_types(tally: TALLY_RESPONSES) -> RULE_NAMES_COUNT:
    """
    Calculate all the types of rules present in the Tally.
    """
    rule_types = {rt: 0 for rt in BASIC_RULE_TYPES}

    for rule, count in tally.rules.items():
        rt = rule_type(rule)
        rule_types[rt] += count

    return rule_types


__all__ = (
    "rule_type",
    "add_rating_scores",
    "calculate_tally_stats",
    "collect_tally_rules",
    "collect_rule_types",
)
