search.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. from django.db.models import (
  2. CharField,
  3. Expression,
  4. Field,
  5. FloatField,
  6. Func,
  7. Lookup,
  8. TextField,
  9. Value,
  10. )
  11. from django.db.models.expressions import CombinedExpression, register_combinable_fields
  12. from django.db.models.functions import Cast, Coalesce
  13. class SearchVectorExact(Lookup):
  14. lookup_name = "exact"
  15. def process_rhs(self, qn, connection):
  16. if not isinstance(self.rhs, (SearchQuery, CombinedSearchQuery)):
  17. config = getattr(self.lhs, "config", None)
  18. self.rhs = SearchQuery(self.rhs, config=config)
  19. rhs, rhs_params = super().process_rhs(qn, connection)
  20. return rhs, rhs_params
  21. def as_sql(self, qn, connection):
  22. lhs, lhs_params = self.process_lhs(qn, connection)
  23. rhs, rhs_params = self.process_rhs(qn, connection)
  24. params = lhs_params + rhs_params
  25. return "%s @@ %s" % (lhs, rhs), params
  26. class SearchVectorField(Field):
  27. def db_type(self, connection):
  28. return "tsvector"
  29. class SearchQueryField(Field):
  30. def db_type(self, connection):
  31. return "tsquery"
  32. class _Float4Field(Field):
  33. def db_type(self, connection):
  34. return "float4"
  35. class SearchConfig(Expression):
  36. def __init__(self, config):
  37. super().__init__()
  38. if not hasattr(config, "resolve_expression"):
  39. config = Value(config)
  40. self.config = config
  41. @classmethod
  42. def from_parameter(cls, config):
  43. if config is None or isinstance(config, cls):
  44. return config
  45. return cls(config)
  46. def get_source_expressions(self):
  47. return [self.config]
  48. def set_source_expressions(self, exprs):
  49. (self.config,) = exprs
  50. def as_sql(self, compiler, connection):
  51. sql, params = compiler.compile(self.config)
  52. return "%s::regconfig" % sql, params
  53. class SearchVectorCombinable:
  54. ADD = "||"
  55. def _combine(self, other, connector, reversed):
  56. if not isinstance(other, SearchVectorCombinable):
  57. raise TypeError(
  58. "SearchVector can only be combined with other SearchVector "
  59. "instances, got %s." % type(other).__name__
  60. )
  61. if reversed:
  62. return CombinedSearchVector(other, connector, self, self.config)
  63. return CombinedSearchVector(self, connector, other, self.config)
  64. register_combinable_fields(
  65. SearchVectorField, SearchVectorCombinable.ADD, SearchVectorField, SearchVectorField
  66. )
  67. class SearchVector(SearchVectorCombinable, Func):
  68. function = "to_tsvector"
  69. arg_joiner = " || ' ' || "
  70. output_field = SearchVectorField()
  71. def __init__(self, *expressions, config=None, weight=None):
  72. super().__init__(*expressions)
  73. self.config = SearchConfig.from_parameter(config)
  74. if weight is not None and not hasattr(weight, "resolve_expression"):
  75. weight = Value(weight)
  76. self.weight = weight
  77. def resolve_expression(
  78. self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False
  79. ):
  80. resolved = super().resolve_expression(
  81. query, allow_joins, reuse, summarize, for_save
  82. )
  83. if self.config:
  84. resolved.config = self.config.resolve_expression(
  85. query, allow_joins, reuse, summarize, for_save
  86. )
  87. return resolved
  88. def as_sql(self, compiler, connection, function=None, template=None):
  89. clone = self.copy()
  90. clone.set_source_expressions(
  91. [
  92. Coalesce(
  93. expression
  94. if isinstance(expression.output_field, (CharField, TextField))
  95. else Cast(expression, TextField()),
  96. Value(""),
  97. )
  98. for expression in clone.get_source_expressions()
  99. ]
  100. )
  101. config_sql = None
  102. config_params = []
  103. if template is None:
  104. if clone.config:
  105. config_sql, config_params = compiler.compile(clone.config)
  106. template = "%(function)s(%(config)s, %(expressions)s)"
  107. else:
  108. template = clone.template
  109. sql, params = super(SearchVector, clone).as_sql(
  110. compiler,
  111. connection,
  112. function=function,
  113. template=template,
  114. config=config_sql,
  115. )
  116. extra_params = []
  117. if clone.weight:
  118. weight_sql, extra_params = compiler.compile(clone.weight)
  119. sql = "setweight({}, {})".format(sql, weight_sql)
  120. return sql, config_params + params + extra_params
  121. class CombinedSearchVector(SearchVectorCombinable, CombinedExpression):
  122. def __init__(self, lhs, connector, rhs, config, output_field=None):
  123. self.config = config
  124. super().__init__(lhs, connector, rhs, output_field)
  125. class SearchQueryCombinable:
  126. BITAND = "&&"
  127. BITOR = "||"
  128. def _combine(self, other, connector, reversed):
  129. if not isinstance(other, SearchQueryCombinable):
  130. raise TypeError(
  131. "SearchQuery can only be combined with other SearchQuery "
  132. "instances, got %s." % type(other).__name__
  133. )
  134. if reversed:
  135. return CombinedSearchQuery(other, connector, self, self.config)
  136. return CombinedSearchQuery(self, connector, other, self.config)
  137. # On Combinable, these are not implemented to reduce confusion with Q. In
  138. # this case we are actually (ab)using them to do logical combination so
  139. # it's consistent with other usage in Django.
  140. def __or__(self, other):
  141. return self._combine(other, self.BITOR, False)
  142. def __ror__(self, other):
  143. return self._combine(other, self.BITOR, True)
  144. def __and__(self, other):
  145. return self._combine(other, self.BITAND, False)
  146. def __rand__(self, other):
  147. return self._combine(other, self.BITAND, True)
  148. class SearchQuery(SearchQueryCombinable, Func):
  149. output_field = SearchQueryField()
  150. SEARCH_TYPES = {
  151. "plain": "plainto_tsquery",
  152. "phrase": "phraseto_tsquery",
  153. "raw": "to_tsquery",
  154. "websearch": "websearch_to_tsquery",
  155. }
  156. def __init__(
  157. self,
  158. value,
  159. output_field=None,
  160. *,
  161. config=None,
  162. invert=False,
  163. search_type="plain",
  164. ):
  165. self.function = self.SEARCH_TYPES.get(search_type)
  166. if self.function is None:
  167. raise ValueError("Unknown search_type argument '%s'." % search_type)
  168. if not hasattr(value, "resolve_expression"):
  169. value = Value(value)
  170. expressions = (value,)
  171. self.config = SearchConfig.from_parameter(config)
  172. if self.config is not None:
  173. expressions = (self.config,) + expressions
  174. self.invert = invert
  175. super().__init__(*expressions, output_field=output_field)
  176. def as_sql(self, compiler, connection, function=None, template=None):
  177. sql, params = super().as_sql(compiler, connection, function, template)
  178. if self.invert:
  179. sql = "!!(%s)" % sql
  180. return sql, params
  181. def __invert__(self):
  182. clone = self.copy()
  183. clone.invert = not self.invert
  184. return clone
  185. def __str__(self):
  186. result = super().__str__()
  187. return ("~%s" % result) if self.invert else result
  188. class CombinedSearchQuery(SearchQueryCombinable, CombinedExpression):
  189. def __init__(self, lhs, connector, rhs, config, output_field=None):
  190. self.config = config
  191. super().__init__(lhs, connector, rhs, output_field)
  192. def __str__(self):
  193. return "(%s)" % super().__str__()
  194. class SearchRank(Func):
  195. function = "ts_rank"
  196. output_field = FloatField()
  197. def __init__(
  198. self,
  199. vector,
  200. query,
  201. weights=None,
  202. normalization=None,
  203. cover_density=False,
  204. ):
  205. from .fields.array import ArrayField
  206. if not hasattr(vector, "resolve_expression"):
  207. vector = SearchVector(vector)
  208. if not hasattr(query, "resolve_expression"):
  209. query = SearchQuery(query)
  210. expressions = (vector, query)
  211. if weights is not None:
  212. if not hasattr(weights, "resolve_expression"):
  213. weights = Value(weights)
  214. weights = Cast(weights, ArrayField(_Float4Field()))
  215. expressions = (weights,) + expressions
  216. if normalization is not None:
  217. if not hasattr(normalization, "resolve_expression"):
  218. normalization = Value(normalization)
  219. expressions += (normalization,)
  220. if cover_density:
  221. self.function = "ts_rank_cd"
  222. super().__init__(*expressions)
  223. class SearchHeadline(Func):
  224. function = "ts_headline"
  225. template = "%(function)s(%(expressions)s%(options)s)"
  226. output_field = TextField()
  227. def __init__(
  228. self,
  229. expression,
  230. query,
  231. *,
  232. config=None,
  233. start_sel=None,
  234. stop_sel=None,
  235. max_words=None,
  236. min_words=None,
  237. short_word=None,
  238. highlight_all=None,
  239. max_fragments=None,
  240. fragment_delimiter=None,
  241. ):
  242. if not hasattr(query, "resolve_expression"):
  243. query = SearchQuery(query)
  244. options = {
  245. "StartSel": start_sel,
  246. "StopSel": stop_sel,
  247. "MaxWords": max_words,
  248. "MinWords": min_words,
  249. "ShortWord": short_word,
  250. "HighlightAll": highlight_all,
  251. "MaxFragments": max_fragments,
  252. "FragmentDelimiter": fragment_delimiter,
  253. }
  254. self.options = {
  255. option: value for option, value in options.items() if value is not None
  256. }
  257. expressions = (expression, query)
  258. if config is not None:
  259. config = SearchConfig.from_parameter(config)
  260. expressions = (config,) + expressions
  261. super().__init__(*expressions)
  262. def as_sql(self, compiler, connection, function=None, template=None):
  263. options_sql = ""
  264. options_params = []
  265. if self.options:
  266. options_params.append(
  267. ", ".join(
  268. connection.ops.compose_sql(f"{option}=%s", [value])
  269. for option, value in self.options.items()
  270. )
  271. )
  272. options_sql = ", %s"
  273. sql, params = super().as_sql(
  274. compiler,
  275. connection,
  276. function=function,
  277. template=template,
  278. options=options_sql,
  279. )
  280. return sql, params + options_params
  281. SearchVectorField.register_lookup(SearchVectorExact)
  282. class TrigramBase(Func):
  283. output_field = FloatField()
  284. def __init__(self, expression, string, **extra):
  285. if not hasattr(string, "resolve_expression"):
  286. string = Value(string)
  287. super().__init__(expression, string, **extra)
  288. class TrigramWordBase(Func):
  289. output_field = FloatField()
  290. def __init__(self, string, expression, **extra):
  291. if not hasattr(string, "resolve_expression"):
  292. string = Value(string)
  293. super().__init__(string, expression, **extra)
  294. class TrigramSimilarity(TrigramBase):
  295. function = "SIMILARITY"
  296. class TrigramDistance(TrigramBase):
  297. function = ""
  298. arg_joiner = " <-> "
  299. class TrigramWordDistance(TrigramWordBase):
  300. function = ""
  301. arg_joiner = " <<-> "
  302. class TrigramStrictWordDistance(TrigramWordBase):
  303. function = ""
  304. arg_joiner = " <<<-> "
  305. class TrigramWordSimilarity(TrigramWordBase):
  306. function = "WORD_SIMILARITY"
  307. class TrigramStrictWordSimilarity(TrigramWordBase):
  308. function = "STRICT_WORD_SIMILARITY"