from lchttp.uri import parse_query_string
from urllib.parse import urlparse


def pagination_params(url: str) -> dict:
    """
    Get pagination query params from URL

    https://draw.bridge/api/accounts/company/?limit=50&offset=950
    """
    parsed = urlparse(url)
    qp = parse_query_string(parsed.query)

    params = {}
    for page_param_key in (
            'cursor',
            'limit',
            'offset',
            'page',
            'page_size',
    ):
        page_param_value = qp.get(page_param_key)
        if page_param_value:
            params[page_param_key] = page_param_value

    return params


def trimmed_url(url: str) -> str:
    """
    After extracting search terms, query params are of little value
    and take up immense database space, so scrap the query & fragment
    """
    parsed = urlparse(url.lower())

    # Some Google URLs don't have the query params delimited by the '?'
    # https://www.google.com/xjs/_/js/k=xjs.s.en_US.85JIVN_kbes.O/m=sy30
    # others may not either, but google's so common it's worth targeting
    if 'google.' in parsed.netloc and len(parsed.path) > 20:
        path = parsed.path.split('=')[0]
    else:
        path = parsed.path

    # Standardize on https scheme to avoid 2 URLs in the database,
    # differing only because of http v https
    scheme = 'https' if parsed.scheme == 'http' else parsed.scheme

    return f'{scheme}://{parsed.netloc}{path}'


__all__ = (
    'pagination_params',
    'trimmed_url',
)
