import re

import hircine.enums as enums
from hircine.scraper.types import (
    URL,
    Artist,
    Category,
    Censorship,
    Character,
    Circle,
    Date,
    Direction,
    Language,
    OriginalTitle,
    Rating,
    Tag,
    Title,
    World,
)
from hircine.scraper.utils import parse_dict


def sanitize(title, split=False):
    text = re.sub(r"\[[^\]]+\]|{[^}]+}|=[^=]+=|^\([^)]+\)", "", title)
    if "|" in text and split:
        orig, text = text.split("|", 1)

    return re.sub(r"\s{2,}", " ", text).strip()


class ExHentaiHandler:
    source = "exhentai"

    def scrape(self, data):
        category_field = "eh_category" if "eh_category" in data else "category"

        parsers = {
            category_field: self.parse_category,
            "posted": Date.from_timestamp,
            "date": Date.from_iso,
            "lang": Language.from_iso_639_3,
            "tags": self.parse_tag,
            "title": lambda t: Title(sanitize(t, split=True)),
            "title_jpn": lambda t: OriginalTitle(sanitize(t)),
        }

        self.is_likely_pornographic = True
        self.is_likely_rtl = False
        self.has_censorship_tag = False
        self.is_western = False

        yield from parse_dict(parsers, data)

        if self.is_likely_pornographic:
            yield Rating(enums.Rating.EXPLICIT)

            if not self.has_censorship_tag:
                if self.is_western:
                    yield Censorship(enums.Censorship.NONE)
                else:
                    yield Censorship(enums.Censorship.BAR)
        else:
            if not self.has_censorship_tag:
                yield Censorship(enums.Censorship.NONE)

        if self.is_likely_rtl:
            yield Direction(enums.Direction.RIGHT_TO_LEFT)

        if (gid := data["gid"]) and (token := data["token"]):
            yield URL(f"https://exhentai.org/g/{gid}/{token}")

    def parse_category(self, input):
        match input.lower():
            case "doujinshi":
                self.is_likely_rtl = True
                return Category(value=enums.Category.DOUJINSHI)
            case "manga":
                self.is_likely_rtl = True
                return Category(value=enums.Category.MANGA)
            case "western":
                self.is_western = True
            case "artist cg":
                return Category(value=enums.Category.COMIC)
            case "game cg":
                return Category(value=enums.Category.GAME_CG)
            case "image set":
                return Category(value=enums.Category.IMAGE_SET)
            case "non-h":
                self.is_likely_pornographic = False
                return Rating(value=enums.Rating.QUESTIONABLE)

    def parse_tag(self, input):
        match input.split(":"):
            case ["parody", value]:
                return World(value)
            case ["group", value]:
                return Circle(value)
            case ["artist", value]:
                return Artist(value)
            case ["character", value]:
                return Character(value)
            case ["language", value]:
                return self.parse_language(value, from_value=True)
            case ["other", "artbook"]:
                return Category(enums.Category.ARTBOOK)
            case ["other", "full censorship"]:
                self.has_censorship_tag = True
                return Censorship(enums.Censorship.FULL)
            case ["other", "mosaic censorship"]:
                self.has_censorship_tag = True
                return Censorship(enums.Censorship.MOSAIC)
            case ["other", "uncensored"]:
                self.has_censorship_tag = True
                return Censorship(enums.Censorship.NONE)
            case ["other", "non-h imageset" | "western imageset"]:
                return Category(value=enums.Category.IMAGE_SET)
            case ["other", "western non-h"]:
                self.is_likely_pornographic = False
                return Rating(value=enums.Rating.QUESTIONABLE)
            case ["other", "comic"]:
                return Category(value=enums.Category.COMIC)
            case ["other", "variant set"]:
                return Category(value=enums.Category.VARIANT_SET)
            case ["other", "webtoon"]:
                return Category(value=enums.Category.WEBTOON)
            case [namespace, tag]:
                return Tag(namespace=namespace, tag=tag)
            case [tag]:
                return Tag(namespace=None, tag=tag)

    def parse_language(self, input, from_value=False):
        if not input or input in ["translated", "speechless", "N/A"]:
            return

        return Language.from_name(input)
