import re
import requests
from typing import List, Optional, Dict
from dataclasses import dataclass, field

def snake_to_camel(snake_str: str) -> str:
    """Convert snake_case string to camelCase.

    Args:
        snake_str (str): The string in snake_case format.

    Returns:
        str: The string converted to camelCase format.
    """
    components = snake_str.split("_")
    return components[0] + "".join(x.title() for x in components[1:])

def to_camel_case(data: dict) -> dict:
    """Convert keys in a dictionary from snake_case to camelCase.

    Args:
        data (dict): The dictionary with keys in snake_case format.

    Returns:
        dict: The dictionary with keys converted to camelCase format.
    """
    return {snake_to_camel(k): v for k, v in data.items() if v is not None}

def camel_to_snake(camel_str: str) -> str:
    """Convert camelCase string to snake_case.

    Args:
        camel_str (str): The string in camelCase format.

    Returns:
        str: The string converted to snake_case format.
    """
    snake_str = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", camel_str)
    return re.sub("([a-z0-9])([A-Z])", r"\1_\2", snake_str).lower()

def to_snake_case(data: dict) -> dict:
    """Convert keys in a dictionary from camelCase to snake_case.

    Args:
        data (dict): The dictionary with keys in camelCase format.

    Returns:
        dict: The dictionary with keys converted to snake_case format.
    """
    return {camel_to_snake(k): v for k, v in data.items()}

SEARCH_OPTIONS_TYPES = {
    'query': str,  # Declarative suggestion for search.
    'num_results': int,  # Number of results (Default: 10, Max for basic: 10).
    'include_domains': list,  # Domains to search from; exclusive with 'exclude_domains'.
    'exclude_domains': list,  # Domains to omit; exclusive with 'include_domains'.
    'start_crawl_date': str,  # Results after this crawl date. ISO 8601 format.
    'end_crawl_date': str,  # Results before this crawl date. ISO 8601 format.
    'start_published_date': str,  # Results after this publish date; excludes links with no date. ISO 8601 format.
    'end_published_date': str,  # Results before this publish date; excludes links with no date. ISO 8601 format.
    'use_autoprompt': bool,  # Convert query to Metaphor (Higher latency, Default: false).
    'type': str,  # 'keyword' or 'neural' (Default: neural). Choose 'neural' for high-quality, semantically relevant content in popular domains. 'Keyword' is for specific, local, or obscure queries.
}

FIND_SIMILAR_OPTIONS_TYPES = {
    'url': str, # The url for which you would like to find similar links
    'num_results': int,
    'include_domains': list,
    'exclude_domains': list,
    'start_crawl_date': str,
    'end_crawl_date': str,
    'start_published_date': str,
    'end_published_date': str,
    'exclude_source_domain': bool,
}

def validate_search_options(options: Dict[str, Optional[object]]) -> None:
    """Validate search options against expected types and constraints.

    Args:
        options (Dict[str, Optional[object]]): The search options to validate.

    Raises:
        ValueError: If an invalid option or option type is provided.
    """
    for key, value in options.items():
        if key not in SEARCH_OPTIONS_TYPES:
            raise ValueError(f"Invalid option: '{key}'")
        if not isinstance(value, SEARCH_OPTIONS_TYPES[key]):
            raise ValueError(f"Invalid type for option '{key}': Expected {SEARCH_OPTIONS_TYPES[key]}, got {type(value)}")
        if key in ['include_domains', 'exclude_domains'] and not value:
            raise ValueError(f"Invalid value for option '{key}': cannot be an empty list")

def validate_find_similar_options(options: Dict[str, Optional[object]]) -> None:
    """Validate find similar options against expected types and constraints.

    Args:
        options (Dict[str, Optional[object]]): The find similar options to validate.

    Raises:
        ValueError: If an invalid option or option type is provided.
    """
    for key, value in options.items():
        if key not in FIND_SIMILAR_OPTIONS_TYPES:
            raise ValueError(f"Invalid option: '{key}'")
        if not isinstance(value, FIND_SIMILAR_OPTIONS_TYPES[key]):
            raise ValueError(f"Invalid type for option '{key}': Expected {FIND_SIMILAR_OPTIONS_TYPES[key]}, got {type(value)}")
        if key in ['include_domains', 'exclude_domains'] and not value:
            raise ValueError(f"Invalid value for option '{key}': cannot be an empty list")

@dataclass
class Result:
    """A class representing a search result.

    Attributes:
        title (str): The title of the search result.
        url (str): The URL of the search result.
        id (str): The temporary ID for the document.
        score (float, optional): A number from 0 to 1 representing similarity between the query/url and the result.
        published_date (str, optional): An estimate of the creation date, from parsing HTML content.
        author (str, optional): If available, the author of the content.
    """
    title: str
    url: str
    id: str
    score: Optional[float] = None
    published_date: Optional[str] = None
    author: Optional[str] = None
    extract: Optional[str] = None

    def __init__(self, title: str, url: str, id: str, score: Optional[float] = None, published_date: Optional[str] = None, author: Optional[str] = None, **kwargs):
        self.title = title
        self.url = url
        self.score = score
        self.id = id
        self.published_date = published_date
        self.author = author
    def __str__(self):
        return (f"Title: {self.title}\n"
                f"URL: {self.url}\n"
                f"ID: {self.id}\n"
                f"Score: {self.score}\n"
                f"Published Date: {self.published_date}\n"
                f"Author: {self.author}\n"
                f"Extract: {self.extract}")

@dataclass
class DocumentContent:
    """A class representing the content of a document.

    Attributes:
        id (str): The ID of the document.
        url (str): The URL of the document.
        title (str): The title of the document.
        extract (str): The first 1000 tokens of content in the document.
        author (str, optional): If available, the author of the content.
    """
    id: str
    url: str
    title: str
    extract: str
    author: Optional[str] = None

    def __init__(self, id: str, url: str, title: str, extract: str, author: Optional[str] = None, **kwargs):
        self.id = id
        self.url = url
        self.title = title
        self.extract = extract
        self.author = author

    def __str__(self):
        return (f"ID: {self.id}\n"
                f"URL: {self.url}\n"
                f"Title: {self.title}\n"
                f"Extract: {self.extract}"
                f"Author: {self.author}")

@dataclass
class GetContentsResponse:
    """A class representing the response for getting contents of documents.

    Attributes:
        contents (List[DocumentContent]): A list of document contents.
    """
    contents: List[DocumentContent]

    def __str__(self):
        return "\n\n".join(str(content) for content in self.contents)

@dataclass
class SearchResponse:
    """A class representing the response for a search operation.

    Attributes:
        results (List[Result]): A list of search results.
        autoprompt_string (str, optional): The Metaphor query created by the autoprompt functionality.
    """
    results: List[Result]
    autoprompt_string: Optional[str] = None
    api: Optional['Metaphor'] = field(default=None, init=False)

    def get_contents(self):
        """Retrieve the contents of documents from the search results.
        
        Returns:
            GetContentsResponse: The response containing the retrieved contents.
        
        Raises:
            Exceptions: If the API client is not set. (The SearchResponse object was not returned by the `search` method of a `Metaphor` client)
        """
        if self.api is None:
            raise Exception("API client is not set. This method should be called on a SearchResponse returned by the 'search' method of 'Metaphor'.")
        ids = [result.id for result in self.results]
        return self.api.get_contents(ids)

    def __str__(self):
        output = "\n\n".join(str(result) for result in self.results)
        if self.autoprompt_string:
            output += f"\n\nAutoprompt String: {self.autoprompt_string}"
        return output

class Metaphor:
    """A client for interacting with the Metaphor Search API.

    Attributes:
        base_url (str): The base URL for the Metaphor API.
        headers (dict): The headers to include in API requests.
    """

    def __init__(self, api_key: str, base_url: str = "https://api.metaphor.systems", user_agent: str = "metaphor-python 0.1.21"):
        """Initialize the Metaphor client with the provided API key and optional base URL and user agent.

        Args:
            api_key (str): The API key for authenticating with the Metaphor API.
            base_url (str, optional): The base URL for the Metaphor API. Defaults to "https://api.metaphor.systems".
            user_agent (str, optional): The user agent string to use for requests. Defaults to "metaphor-python 0.1.21".
        """
        self.base_url = base_url
        self.headers = {"x-api-key": api_key, "User-Agent": user_agent}

    def search(self, query: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None,
               exclude_domains: Optional[List[str]] = None, start_crawl_date: Optional[str] = None,
               end_crawl_date: Optional[str] = None, start_published_date: Optional[str] = None,
               end_published_date: Optional[str] = None, use_autoprompt: Optional[bool] = None,
               type: Optional[str] = None) -> SearchResponse:
        """Perform a search with a Metaphor prompt-engineered query and retrieve a list of relevant results.

        Args:
            query (str): The query string.
            num_results (int, optional): Number of search results to return. Defaults to 10.
            include_domains (List[str], optional): List of domains to include in the search.
            exclude_domains (List[str], optional): List of domains to exclude in the search.
            start_crawl_date (str, optional): Results will only include links crawled after this date.
            end_crawl_date (str, optional): Results will only include links crawled before this date.
            start_published_date (str, optional): Results will only include links with a published date after this date.
            end_published_date (str, optional): Results will only include links with a published date before this date.
            use_autoprompt (bool, optional): If true, convert query to a Metaphor query. Defaults to False.
            type (str, optional): The type of search, 'keyword' or 'neural'. Defaults to "neural".

        Returns:
            SearchResponse: The response containing search results and optional autoprompt string.
        """
        options = {k: v for k, v in locals().items() if k != 'self' and v is not None}
        validate_search_options(options)
        request = {'query': query}
        request.update(to_camel_case(options))
        response = requests.post(f"{self.base_url}/search", json=request, headers=self.headers)
        if response.status_code != 200:
            raise Exception(f"Request failed with status code {response.status_code}. Message: {response.text}")
        results = [Result(**to_snake_case(result)) for result in response.json()["results"]]
        autoprompt_string = response.json()["autopromptString"] if "autopromptString" in response.json() else None
        search_response = SearchResponse(results=results, autoprompt_string=autoprompt_string)
        search_response.api = self
        return search_response

    def find_similar(self, url: str, num_results: Optional[int] = None, include_domains: Optional[List[str]] = None,
                     exclude_domains: Optional[List[str]] = None, start_crawl_date: Optional[str] = None,
                     end_crawl_date: Optional[str] = None, start_published_date: Optional[str] = None,
                     end_published_date: Optional[str] = None, exclude_source_domain:Optional[bool] = None) -> SearchResponse:
        """Find similar links to the link provided.

        Args:
            url (str): The URL for which to find similar links.
            num_results (int, optional): Number of search results to return. Defaults to 10.
            include_domains (List[str], optional): List of domains to include in the search.
            exclude_domains (List[str], optional): List of domains to exclude in the search.
            start_crawl_date (str, optional): Results will only include links crawled after this date.
            end_crawl_date (str, optional): Results will only include links crawled before this date.
            start_published_date (str, optional): Results will only include links with a published date after this date.
            end_published_date (str, optional): Results will only include links with a published date before this date.
            exclude_source_domain (bool, optional): If true, exclude links from the base domain of the input URL. Defaults to True.

        Returns:
            SearchResponse: The response containing search results.
        """
        options = {k: v for k, v in locals().items() if k != 'self' and v is not None}
        validate_find_similar_options(options)
        request = {'url': url}
        request.update(to_camel_case(options))
        response = requests.post(f"{self.base_url}/findSimilar", json=request, headers=self.headers)
        if response.status_code != 200:
            raise Exception(f"Request failed with status code {response.status_code}. Message: {response.text}")
        results = [Result(**to_snake_case(result)) for result in response.json()["results"]]
        find_similar_response = SearchResponse(results=results)
        find_similar_response.api = self
        return find_similar_response

    def get_contents(self, ids: List[str]) -> GetContentsResponse:
        """Retrieve contents of documents based on a list of document IDs.

        Args:
            ids (List[str]): An array of document IDs obtained from either /search or /findSimilar endpoints.

        Returns:
            GetContentsResponse: The response containing document contents.
        """
        if len(ids) == 0:
            return GetContentsResponse([])
        response = requests.get(f"{self.base_url}/contents", params=to_camel_case({"ids": ids}), headers=self.headers)
        if response.status_code != 200:
            raise Exception(f"Request failed with status code {response.status_code}. Message: {response.text}")
        return GetContentsResponse([DocumentContent(**to_snake_case(document)) for document in response.json()["contents"]])