views.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. import datetime
  2. from dataclasses import dataclass
  3. from functools import wraps
  4. from django.contrib.sites.shortcuts import get_current_site
  5. from django.core.paginator import EmptyPage, PageNotAnInteger
  6. from django.http import Http404
  7. from django.template.response import TemplateResponse
  8. from django.urls import reverse
  9. from django.utils import timezone
  10. from django.utils.http import http_date
  11. @dataclass
  12. class SitemapIndexItem:
  13. location: str
  14. last_mod: bool = None
  15. def x_robots_tag(func):
  16. @wraps(func)
  17. def inner(request, *args, **kwargs):
  18. response = func(request, *args, **kwargs)
  19. response.headers["X-Robots-Tag"] = "noindex, noodp, noarchive"
  20. return response
  21. return inner
  22. def _get_latest_lastmod(current_lastmod, new_lastmod):
  23. """
  24. Returns the latest `lastmod` where `lastmod` can be either a date or a
  25. datetime.
  26. """
  27. if not isinstance(new_lastmod, datetime.datetime):
  28. new_lastmod = datetime.datetime.combine(new_lastmod, datetime.time.min)
  29. if timezone.is_naive(new_lastmod):
  30. new_lastmod = timezone.make_aware(new_lastmod, datetime.timezone.utc)
  31. return new_lastmod if current_lastmod is None else max(current_lastmod, new_lastmod)
  32. @x_robots_tag
  33. def index(
  34. request,
  35. sitemaps,
  36. template_name="sitemap_index.xml",
  37. content_type="application/xml",
  38. sitemap_url_name="django.contrib.sitemaps.views.sitemap",
  39. ):
  40. req_protocol = request.scheme
  41. req_site = get_current_site(request)
  42. sites = [] # all sections' sitemap URLs
  43. all_indexes_lastmod = True
  44. latest_lastmod = None
  45. for section, site in sitemaps.items():
  46. # For each section label, add links of all pages of its sitemap
  47. # (usually generated by the `sitemap` view).
  48. if callable(site):
  49. site = site()
  50. protocol = req_protocol if site.protocol is None else site.protocol
  51. sitemap_url = reverse(sitemap_url_name, kwargs={"section": section})
  52. absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url)
  53. site_lastmod = site.get_latest_lastmod()
  54. if all_indexes_lastmod:
  55. if site_lastmod is not None:
  56. latest_lastmod = _get_latest_lastmod(latest_lastmod, site_lastmod)
  57. else:
  58. all_indexes_lastmod = False
  59. sites.append(SitemapIndexItem(absolute_url, site_lastmod))
  60. # Add links to all pages of the sitemap.
  61. for page in range(2, site.paginator.num_pages + 1):
  62. sites.append(
  63. SitemapIndexItem("%s?p=%s" % (absolute_url, page), site_lastmod)
  64. )
  65. # If lastmod is defined for all sites, set header so as
  66. # ConditionalGetMiddleware is able to send 304 NOT MODIFIED
  67. if all_indexes_lastmod and latest_lastmod:
  68. headers = {"Last-Modified": http_date(latest_lastmod.timestamp())}
  69. else:
  70. headers = None
  71. return TemplateResponse(
  72. request,
  73. template_name,
  74. {"sitemaps": sites},
  75. content_type=content_type,
  76. headers=headers,
  77. )
  78. @x_robots_tag
  79. def sitemap(
  80. request,
  81. sitemaps,
  82. section=None,
  83. template_name="sitemap.xml",
  84. content_type="application/xml",
  85. ):
  86. req_protocol = request.scheme
  87. req_site = get_current_site(request)
  88. if section is not None:
  89. if section not in sitemaps:
  90. raise Http404("No sitemap available for section: %r" % section)
  91. maps = [sitemaps[section]]
  92. else:
  93. maps = sitemaps.values()
  94. page = request.GET.get("p", 1)
  95. lastmod = None
  96. all_sites_lastmod = True
  97. urls = []
  98. for site in maps:
  99. try:
  100. if callable(site):
  101. site = site()
  102. urls.extend(site.get_urls(page=page, site=req_site, protocol=req_protocol))
  103. if all_sites_lastmod:
  104. site_lastmod = getattr(site, "latest_lastmod", None)
  105. if site_lastmod is not None:
  106. lastmod = _get_latest_lastmod(lastmod, site_lastmod)
  107. else:
  108. all_sites_lastmod = False
  109. except EmptyPage:
  110. raise Http404("Page %s empty" % page)
  111. except PageNotAnInteger:
  112. raise Http404("No page '%s'" % page)
  113. # If lastmod is defined for all sites, set header so as
  114. # ConditionalGetMiddleware is able to send 304 NOT MODIFIED
  115. if all_sites_lastmod:
  116. headers = {"Last-Modified": http_date(lastmod.timestamp())} if lastmod else None
  117. else:
  118. headers = None
  119. return TemplateResponse(
  120. request,
  121. template_name,
  122. {"urlset": urls},
  123. content_type=content_type,
  124. headers=headers,
  125. )