# Types for the extended information parsed
# from the default responses from Redwood's API.
from msgspec import Struct
from typing import Any

try:
    from typing import Self
except ImportError:
    from typing_extensions import Self
from .categories import (
    ACTIONS,
    CATEGORY_NAMES_SCORES,
    RatingName,
    RULE_NAMES_COUNT,
    NUMBER,
)
from .categories import RedwoodAction
from .redwood_api_types import (
    ApiError,
    ClassifyTally,
    ClassifyText,
    ClassifyUrl,
    ClassifyUrlAnalyze,
)
from ..settings import (
    MED_CONFIDENCE_SCORE_THRESHOLD,
    HIGH_CONFIDENCE_SCORE_THRESHOLD,
    PHRASE_SCORING_SKEWED_RATIO,
    PHRASE_SCORING_LOW_CONFIDENCE_RATIO,
    PHRASE_SCORING_MED_CONFIDENCE_RATIO,
    PHRASE_SCORING_HIGH_CONFIDENCE_RATIO,
    IDEAL_MAX_PHRASE_AVERAGE,
    SKEWED_SCORING_MULTIPLIER,
)


class ClassifierCategoryStat(Struct):
    """
    Classifier Category results analysis.
    """

    score: NUMBER
    rating: str
    action: ACTIONS
    domain_score: NUMBER
    domain_rules: list[str]
    ip_score: NUMBER
    ip_rules: list[str]
    phrase_score: NUMBER
    phrase_rules: list[str]
    regex_score: NUMBER
    regex_rules: list[str]

    def append(self, field: str, value: Any) -> None:
        """
        Helper function to append a value to one of the fields.
        """
        getattr(self, field).append(value)

    def increment(self, field: str, value: NUMBER) -> None:
        """
        Helper function to increment one of the
        field values by the amount specified.
        """
        current = getattr(self, field)
        setattr(self, field, current + value)

    def confidence(self) -> float:
        """
        Calculate confidence of the Classification Score this category achieved.

        Phrase-based scoring is preferred, followed by request-based rules.
        """
        request_based_rules = (self.ip_score, self.regex_score, self.domain_score)
        has_request_based_rules = any(request_based_rules)

        if has_request_based_rules:
            # If both request & response scores included, we're satisfied
            if self.phrase_score:
                return PHRASE_SCORING_HIGH_CONFIDENCE_RATIO

            if len([rbr for rbr in request_based_rules if rbr]) > 1:
                return PHRASE_SCORING_MED_CONFIDENCE_RATIO

            return PHRASE_SCORING_LOW_CONFIDENCE_RATIO

        return self.phrase_confidence()

    def phrase_confidence(self) -> float:
        """
        Phrase confidence is highest when the score
        is high and numerous rules contributed.
        """
        if self.phrase_scoring_skewed():
            return PHRASE_SCORING_SKEWED_RATIO

        if self.phrase_score >= HIGH_CONFIDENCE_SCORE_THRESHOLD:
            return 1.0

        if self.phrase_score >= MED_CONFIDENCE_SCORE_THRESHOLD:
            return 0.75

        return 0.5 if self.phrase_score else 0.0

    def phrase_scoring_skewed(self) -> bool:
        """
        The more rules that contributed to the phrase the better. When the
        score is high and the phrase count low, we have reason to question
        the validity of the classifying.
        """
        try:
            per_phrase_avg_score = self.phrase_score / len(self.phrase_rules)
        except ZeroDivisionError:
            return False

        if per_phrase_avg_score > IDEAL_MAX_PHRASE_AVERAGE:
            return True

        return False

    @classmethod
    def empty(cls) -> Self:
        """
        Return an empty ClassifierCategoryStat object,
        for use when an empty / "falsy" value is needed.
        """
        return ClassifierCategoryStat(
            score=0,
            rating=RatingName.MISC,
            action=RedwoodAction.Ignore,  # type: ignore[arg-type]
            domain_score=0,
            domain_rules=[],
            ip_score=0,
            ip_rules=[],
            phrase_score=0,
            phrase_rules=[],
            regex_score=0,
            regex_rules=[],
        )


CategoryCodeStats = dict[str, ClassifierCategoryStat]


class TallyStat(Struct):
    frq: int
    uniq: int
    rules: set[str]

    @classmethod
    def empty(cls) -> Self:
        """
        Return an empty TallyStat object,
        for use when an empty / "falsy" value is needed.
        """
        return TallyStat(frq=0, uniq=0, rules=set())


class TallyStats(Struct):
    domain: TallyStat
    ip: TallyStat
    phrase: TallyStat
    regex: TallyStat

    @classmethod
    def empty(cls) -> Self:
        """
        Return an empty TallyStats object,
        for use when an empty / "falsy" value is needed.
        """
        return TallyStats(
            domain=TallyStat.empty(),
            ip=TallyStat.empty(),
            phrase=TallyStat.empty(),
            regex=TallyStat.empty(),
        )


class RatingStat(Struct):
    """
    Individual Category Rating stats for Classifier Categories.
    """

    total_score: NUMBER  # combined score of all categories with this rating
    phrase_score: NUMBER  # score of phrases from all categories with this rating
    phrase_count: NUMBER = 0  # number of phrases that produced the phrase score

    def scored_by_url_and_content(self) -> bool:
        """
        Rating was scored by both Phrases and URL rules.
        This is the best possible situation from a classifying perspective.
        """
        return self.phrase_score and self.phrase_score < self.total_score

    def phrase_ratio(self) -> float:
        """
        Phrase-based classifying is the most significant, so it's of interest
        to know how much of the total score resulted from phrase rules.
        """
        if not self.phrase_score or self.total_score:
            return 0.0

        ratio = self.phrase_score / self.total_score

        if self.phrase_scoring_skewed():
            return ratio * SKEWED_SCORING_MULTIPLIER

        return ratio

    def phrase_confidence(self, combined_phrase_score: NUMBER) -> float:
        """
        Rate of certainty that this rating is the most correct
        compared with ALL ratings found in the request.

        Compares based on phrase pattern types.
        """
        try:
            combined_ratio = self.phrase_score / combined_phrase_score
        except ZeroDivisionError:
            return 0.0

        if self.phrase_scoring_skewed():
            return combined_ratio * SKEWED_SCORING_MULTIPLIER

        return combined_ratio

    def phrase_scoring_skewed(self) -> bool:
        """
        The more rules that contributed to the phrase the better. When the
        score is high and the phrase count low, we have reason to question
        the validity of the classifying.

        If scoring is skewed, we decrease the ratio by a multiplier.
        """
        try:
            per_phrase_avg_score = self.phrase_score / self.phrase_count
        except ZeroDivisionError:
            return False

        if per_phrase_avg_score > IDEAL_MAX_PHRASE_AVERAGE:
            return True

        return False

    def confidence(self, combined_score: NUMBER) -> float:
        """
        Rate of certainty that this rating is the most correct
        compared with ALL ratings found in the request.

        Compares based on ALL pattern types.
        """
        try:
            return self.total_score / combined_score
        except ZeroDivisionError:
            return 0.0

    @classmethod
    def empty(cls) -> Self:
        """
        Return an empty RatingStat object,
        for use when an empty / "falsy" value is needed.
        """
        return RatingStat(total_score=0, phrase_score=0)


RatingCodeStats = dict[RatingName, RatingStat]


class ClassifyTallyResponse(ClassifyTally):
    categories: CATEGORY_NAMES_SCORES = {}
    total_score: NUMBER = 0
    total_phrase_score: NUMBER = 0
    rules: RULE_NAMES_COUNT = {}
    rule_types: RULE_NAMES_COUNT = {}
    stats: TallyStats | None = None
    classifierAnalysis: CategoryCodeStats = {}
    ratings: RatingCodeStats = {}

    @classmethod
    def empty(cls) -> Self:
        """
        Return an empty ClassifyTallyResponse object,
        for use when an empty / "falsy" value is needed.
        """
        return ClassifyTallyResponse(
            categories={},
            total_score=0,
            rules={},
            rule_types={},
            stats=TallyStats.empty(),
            classifierAnalysis={},
            scoreAnalysis={},
            ratings={},
        )


class ClassifyUrlVerboseResponse(ClassifyUrl):
    """
    ClassifyUrlVerbose response with extra keys added.
    """

    total_score: NUMBER = 0
    rule_types: dict[str, NUMBER] = {}
    total_phrase_score: NUMBER = 0


class ClassifyUrlAnalyzedResponse(ClassifyUrlAnalyze):
    """
    ClassifyUrlAnalyze response with extra keys added.
    """

    total_score: NUMBER = 0
    total_phrase_score: NUMBER = 0
    rules: dict[str, int] = {}
    rule_types: dict[str, NUMBER] = {}
    classifierAnalysis: CategoryCodeStats = {}
    ratings: RatingCodeStats = {}


EXTENDED_RESPONSES = (
    ApiError
    | ClassifyTallyResponse
    | ClassifyUrlAnalyzedResponse
    | ClassifyUrlVerboseResponse
    | ClassifyText
)


TALLY_RESPONSES = ClassifyTallyResponse | ClassifyUrlAnalyzedResponse

__all__ = (
    "ClassifierCategoryStat",
    "CategoryCodeStats",
    "ClassifyUrlAnalyzedResponse",
    "ClassifyUrlVerboseResponse",
    "ClassifyText",
    "RatingStat",
    "RatingCodeStats",
    "TallyStat",
    "TallyStats",
    "EXTENDED_RESPONSES",
    "TALLY_RESPONSES",
)
