240 lines
7.1 KiB
Python
240 lines
7.1 KiB
Python
"""Sitemap plugin for Reflex."""
|
|
|
|
import datetime
|
|
from collections.abc import Sequence
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
from typing import TYPE_CHECKING, Literal, TypedDict
|
|
from xml.etree.ElementTree import Element, SubElement, indent, tostring
|
|
|
|
from typing_extensions import NotRequired
|
|
|
|
from reflex_base import constants
|
|
|
|
from .base import Plugin as PluginBase
|
|
|
|
if TYPE_CHECKING:
|
|
from reflex.app import UnevaluatedPage
|
|
|
|
TrailingSlashOption = Literal["always", "never", "preserve"]
|
|
|
|
Location = str
|
|
LastModified = datetime.datetime
|
|
ChangeFrequency = Literal[
|
|
"always", "hourly", "daily", "weekly", "monthly", "yearly", "never"
|
|
]
|
|
Priority = float
|
|
|
|
|
|
class SitemapLink(TypedDict):
|
|
"""A link in the sitemap."""
|
|
|
|
loc: Location
|
|
lastmod: NotRequired[LastModified]
|
|
changefreq: NotRequired[ChangeFrequency]
|
|
priority: NotRequired[Priority]
|
|
|
|
|
|
class SitemapLinkConfiguration(TypedDict):
|
|
"""Configuration for a sitemap link."""
|
|
|
|
loc: NotRequired[Location]
|
|
lastmod: NotRequired[LastModified]
|
|
changefreq: NotRequired[ChangeFrequency]
|
|
priority: NotRequired[Priority]
|
|
|
|
|
|
class Constants(SimpleNamespace):
|
|
"""Sitemap constants."""
|
|
|
|
FILE_PATH: Path = Path(constants.Dirs.PUBLIC) / "sitemap.xml"
|
|
|
|
|
|
def configuration_with_loc(
|
|
*,
|
|
config: SitemapLinkConfiguration,
|
|
deploy_url: str | None,
|
|
loc: Location,
|
|
trailing_slash: TrailingSlashOption,
|
|
) -> SitemapLink:
|
|
"""Set the 'loc' field of the configuration.
|
|
|
|
Args:
|
|
config: The configuration dictionary.
|
|
deploy_url: The deployment URL, if any.
|
|
loc: The location to set.
|
|
trailing_slash: Option for handling trailing slashes in URLs.
|
|
|
|
Returns:
|
|
A SitemapLink dictionary with the 'loc' field set.
|
|
"""
|
|
if deploy_url and not loc.startswith("http://") and not loc.startswith("https://"):
|
|
loc = f"{deploy_url.rstrip('/')}/{loc.lstrip('/')}"
|
|
if trailing_slash == "always" and not loc.endswith("/"):
|
|
loc += "/"
|
|
elif trailing_slash == "never":
|
|
stripped = loc.rstrip("/")
|
|
loc = stripped or loc
|
|
link: SitemapLink = {"loc": loc}
|
|
if (lastmod := config.get("lastmod")) is not None:
|
|
link["lastmod"] = lastmod
|
|
if (changefreq := config.get("changefreq")) is not None:
|
|
link["changefreq"] = changefreq
|
|
if (priority := config.get("priority")) is not None:
|
|
link["priority"] = min(1.0, max(0.0, priority))
|
|
return link
|
|
|
|
|
|
def generate_xml(links: Sequence[SitemapLink]) -> str:
|
|
"""Generate an XML sitemap from a list of links.
|
|
|
|
Args:
|
|
links: A sequence of SitemapLink dictionaries.
|
|
|
|
Returns:
|
|
A pretty-printed XML string representing the sitemap.
|
|
"""
|
|
urlset = Element("urlset", xmlns="https://www.sitemaps.org/schemas/sitemap/0.9")
|
|
|
|
for link in links:
|
|
url = SubElement(urlset, "url")
|
|
|
|
loc_element = SubElement(url, "loc")
|
|
loc_element.text = link["loc"]
|
|
|
|
if (changefreq := link.get("changefreq")) is not None:
|
|
changefreq_element = SubElement(url, "changefreq")
|
|
changefreq_element.text = changefreq
|
|
|
|
if (lastmod := link.get("lastmod")) is not None:
|
|
lastmod_element = SubElement(url, "lastmod")
|
|
if isinstance(lastmod, datetime.datetime):
|
|
lastmod = lastmod.isoformat()
|
|
lastmod_element.text = lastmod
|
|
|
|
if (priority := link.get("priority")) is not None:
|
|
priority_element = SubElement(url, "priority")
|
|
priority_element.text = str(priority)
|
|
indent(urlset, " ")
|
|
return tostring(urlset, encoding="utf-8", xml_declaration=True).decode("utf-8")
|
|
|
|
|
|
def is_route_dynamic(route: str) -> bool:
|
|
"""Check if a route is dynamic.
|
|
|
|
Args:
|
|
route: The route to check.
|
|
|
|
Returns:
|
|
True if the route is dynamic, False otherwise.
|
|
"""
|
|
return "[" in route and "]" in route
|
|
|
|
|
|
def generate_links_for_sitemap(
|
|
unevaluated_pages: Sequence["UnevaluatedPage"],
|
|
trailing_slash: TrailingSlashOption,
|
|
) -> list[SitemapLink]:
|
|
"""Generate sitemap links from unevaluated pages.
|
|
|
|
Args:
|
|
unevaluated_pages: Sequence of unevaluated pages.
|
|
trailing_slash: Option for handling trailing slashes in URLs.
|
|
|
|
Returns:
|
|
A list of SitemapLink dictionaries.
|
|
"""
|
|
from reflex_base.config import get_config
|
|
from reflex_base.utils import console
|
|
|
|
deploy_url = get_config().deploy_url
|
|
|
|
links: list[SitemapLink] = []
|
|
|
|
for page in unevaluated_pages:
|
|
sitemap_config: SitemapLinkConfiguration | None = page.context.get(
|
|
"sitemap", {}
|
|
)
|
|
if sitemap_config is None:
|
|
continue
|
|
|
|
if is_route_dynamic(page.route) or page.route == "404":
|
|
if not sitemap_config:
|
|
continue
|
|
|
|
if (loc := sitemap_config.get("loc")) is None:
|
|
route_message = (
|
|
"Dynamic route" if is_route_dynamic(page.route) else "Route 404"
|
|
)
|
|
console.warn(
|
|
route_message
|
|
+ f" '{page.route}' does not have a 'loc' in sitemap configuration. Skipping."
|
|
)
|
|
continue
|
|
|
|
sitemap_link = configuration_with_loc(
|
|
config=sitemap_config,
|
|
deploy_url=deploy_url,
|
|
loc=loc,
|
|
trailing_slash=trailing_slash,
|
|
)
|
|
|
|
elif (loc := sitemap_config.get("loc")) is not None:
|
|
sitemap_link = configuration_with_loc(
|
|
config=sitemap_config,
|
|
deploy_url=deploy_url,
|
|
loc=loc,
|
|
trailing_slash=trailing_slash,
|
|
)
|
|
|
|
else:
|
|
loc = page.route if page.route != "index" else "/"
|
|
if not loc.startswith("/"):
|
|
loc = "/" + loc
|
|
sitemap_link = configuration_with_loc(
|
|
config=sitemap_config,
|
|
deploy_url=deploy_url,
|
|
loc=loc,
|
|
trailing_slash=trailing_slash,
|
|
)
|
|
|
|
links.append(sitemap_link)
|
|
return links
|
|
|
|
|
|
def sitemap_task(
|
|
unevaluated_pages: Sequence["UnevaluatedPage"], trailing_slash: TrailingSlashOption
|
|
) -> tuple[str, str]:
|
|
"""Task to generate the sitemap XML file.
|
|
|
|
Args:
|
|
unevaluated_pages: Sequence of unevaluated pages.
|
|
trailing_slash: Option for handling trailing slashes in URLs.
|
|
|
|
Returns:
|
|
A tuple containing the file path and the generated XML content.
|
|
"""
|
|
return (
|
|
str(Constants.FILE_PATH),
|
|
generate_xml(generate_links_for_sitemap(unevaluated_pages, trailing_slash)),
|
|
)
|
|
|
|
|
|
@dataclass(kw_only=True, frozen=True)
|
|
class SitemapPlugin(PluginBase):
|
|
"""Sitemap plugin for Reflex."""
|
|
|
|
trailing_slash: TrailingSlashOption = "preserve"
|
|
|
|
def pre_compile(self, **context):
|
|
"""Generate the sitemap XML file before compilation.
|
|
|
|
Args:
|
|
context: The context for the plugin.
|
|
"""
|
|
unevaluated_pages = context.get("unevaluated_pages", [])
|
|
context["add_save_task"](sitemap_task, unevaluated_pages, self.trailing_slash)
|
|
|
|
|
|
Plugin = SitemapPlugin
|