from system_env import setup_portal_environment

PORTAL_ENV = setup_portal_environment()
PROJECT_ROOT = PORTAL_ENV["PROJECT_ROOT"]
REDWOOD_CATEGORY_DIR = PORTAL_ENV["REDWOOD_CATEGORY_DIR"]
GEO_IP_DATABASE_DIR = f"{PORTAL_ENV['ROOT_HOME']}/static/databases"

REDWOOD_API = "http://127.0.0.1:6502"

# Category must have at least this score to be counted.
SCORE_THRESHOLD = 200

# Phrase scores are preferred. The higher the phrase score,
# the more confident we are in correctness.
LOW_CONFIDENCE_SCORE_THRESHOLD = SCORE_THRESHOLD * 1.5
MED_CONFIDENCE_SCORE_THRESHOLD = SCORE_THRESHOLD * 3
HIGH_CONFIDENCE_SCORE_THRESHOLD = SCORE_THRESHOLD * 4

# The more phrases contributed to a score, the better,
# so the lower the phrase average score is the more
# sure we can be of the phrase-based classification.
IDEAL_MAX_PHRASE_AVERAGE = 75

PHRASE_SCORING_SKEWED_RATIO = 0.4
PHRASE_SCORING_LOW_CONFIDENCE_RATIO = 0.5
PHRASE_SCORING_MED_CONFIDENCE_RATIO = 0.75
PHRASE_SCORING_HIGH_CONFIDENCE_RATIO = 1.0

# When phrase scoring is skewed by low phrase count,
# decrease our confidence in the ratio by this value.
SKEWED_SCORING_MULTIPLIER = 0.75

LC_CAT_PREFIX = "lc_"
REDWOOD_LOG_FIELDS = (
    "ldate",
    "user",
    "action",
    "url",
    "method",
    "status",
    "content_type",
    "content_length",
    "modified",
    "tally",
    "scores",
    "conditions",
    "page_title",
    "ignored",
    "user_agent",
    "protocol",
    "referer",
    "platform",
    "header_filename",
    "virus",
    "rule_description",
    "origin_ip",
    "starlark_data",
)


AUTH_LOG_FIELDS = (
    "date",
    "credential_status",
    "auth_type",
    "client_ip_address",
    "proxy_port",
    "username",
    "password",
    "platform",
    "network",
    "user_agent",
    "url",
    "message",
)


PRUNE_CATEGORIES = frozenset((
    "js_content",
    "sslbypass",
    "sslbump",
    "phashsites",
    "wash_language",
    "yankheaders",
    "top",
    "top/10k",
    "top/100k",
    "top/million",
    "base_blanket_block",
))

# These categories should not be included in evaluating
# Autofixes or Classification correctness.
NON_SCORING_CATEGORIES = frozenset((
    "js_content",
    "sslbypass",
    "sslbump",
    "phashsites",
    "wash_language",
    "yankheaders",
    "masterwhitelist",
    "whitelists",
    "whitelistsall",
    "coronavirus",
    "businesstweaks",
    "populardomains",
))

# These stone categories may be Level2
# even if phrase scoring is confident.
PERMIT_LEVEL_TWO_AUTOFIX = frozenset((
    "racing",
    "content_youtube",
    "socialnetworking",
    "travel_themeparks",
))


# These categories must be Level3
# even if phrase scoring is skewed.
REQUIRE_LEVEL_THREE_AUTOFIX = frozenset((
    "questionable",
    "videosearch",
    "auto_banned",
))

# The categories with the least margin
# for error in autofixing. They should
# be Level4 in all cases except in lowest
# of low scoring confidence.
AUTOFIXABLE_BOULDER_CATEGORIES = frozenset((
    "phishing",
    "malware",
    "gambling",
    "fashion",
))
