import itertools
from dalmatian.settings import DALMATIAN_DOMAINS_DIR


def read_website_domains() -> set[str]:
    """
    Read domains from 'top_domains.txt'.
    """
    try:
        with open(f'{DALMATIAN_DOMAINS_DIR}/domains.urllist', 'r') as domain_file:
            return set(
                domain.strip()
                for domain in domain_file.readlines()
                if domain.strip() and not domain.startswith(('score ', '#'))
            )
    except FileNotFoundError:
        return set()


def read_website_tlds() -> set[str]:
    """
    Read TLDs from 'top_tlds.txt'.
    """
    try:
        with open(f'{DALMATIAN_DOMAINS_DIR}/top_tlds.txt', 'r') as tld_file:
            return set(domain.strip() for domain in tld_file.readlines())
    except FileNotFoundError:
        return set()


def top_short_tld_pairs() -> list[str]:
    """
    Return list of TLD short pairs such as:

    [('bbc', 'bbc'), ('bbc', 'ltd'), ('bbc', 'site'), ('bbc', 'cat'),...]
    """
    popular_tlds = read_website_tlds()

    # Limiting pairs to TLDs of this length, to
    # keep the number of variations manageable
    length = 3

    # Include TLDs since they're more likely to be used fool users
    retain_tlds = {'cm', 'de', 'eu', 'ie', 'in', 'io', 'it', 'page', 'site', 'to', 'tv', 'uk'}
    short_tlds = [tld for tld in popular_tlds if len(tld) == length or tld in retain_tlds]

    return list(itertools.product(short_tlds, repeat=2))
