forked from mirrors/catstodon
Change account search tokenizer and queries (#26378)
This commit is contained in:
parent
60fbb0fe91
commit
72423bc8f6
2 changed files with 144 additions and 89 deletions
|
@ -33,7 +33,7 @@ class AccountsIndex < Chewy::Index
|
|||
},
|
||||
|
||||
verbatim: {
|
||||
tokenizer: 'whitespace',
|
||||
tokenizer: 'standard',
|
||||
filter: %w(lowercase asciifolding cjk_width),
|
||||
},
|
||||
|
||||
|
|
|
@ -8,6 +8,143 @@ class AccountSearchService < BaseService
|
|||
# Min. number of characters to look for non-exact matches
|
||||
MIN_QUERY_LENGTH = 5
|
||||
|
||||
class QueryBuilder
|
||||
def initialize(query, account, options = {})
|
||||
@query = query
|
||||
@account = account
|
||||
@options = options
|
||||
end
|
||||
|
||||
def build
|
||||
AccountsIndex.query(
|
||||
bool: {
|
||||
must: {
|
||||
function_score: {
|
||||
query: {
|
||||
bool: {
|
||||
must: must_clauses,
|
||||
},
|
||||
},
|
||||
|
||||
functions: [
|
||||
reputation_score_function,
|
||||
followers_score_function,
|
||||
time_distance_function,
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
should: should_clauses,
|
||||
}
|
||||
)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def must_clauses
|
||||
if @account && @options[:following]
|
||||
[core_query, only_following_query]
|
||||
else
|
||||
[core_query]
|
||||
end
|
||||
end
|
||||
|
||||
def should_clauses
|
||||
if @account && !@options[:following]
|
||||
[boost_following_query]
|
||||
else
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
# This function limits results to only the accounts the user is following
|
||||
def only_following_query
|
||||
{
|
||||
terms: {
|
||||
id: following_ids,
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
# This function promotes accounts the user is following
|
||||
def boost_following_query
|
||||
{
|
||||
terms: {
|
||||
id: following_ids,
|
||||
boost: 100,
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
# This function deranks accounts that follow more people than follow them
|
||||
def reputation_score_function
|
||||
{
|
||||
script_score: {
|
||||
script: {
|
||||
source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
# This function promotes accounts that have more followers
|
||||
def followers_score_function
|
||||
{
|
||||
script_score: {
|
||||
script: {
|
||||
source: "(Math.max(doc['followers_count'].value, 0) / (Math.max(doc['followers_count'].value, 0) + 1))",
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
# This function deranks accounts that haven't posted in a long time
|
||||
def time_distance_function
|
||||
{
|
||||
gauss: {
|
||||
last_status_at: {
|
||||
scale: '30d',
|
||||
offset: '30d',
|
||||
decay: 0.3,
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
def following_ids
|
||||
@following_ids ||= @account.active_relationships.pluck(:target_account_id) + [@account.id]
|
||||
end
|
||||
end
|
||||
|
||||
class AutocompleteQueryBuilder < QueryBuilder
|
||||
private
|
||||
|
||||
def core_query
|
||||
{
|
||||
multi_match: {
|
||||
query: @query,
|
||||
type: 'bool_prefix',
|
||||
fields: %w(username username.* display_name display_name.*),
|
||||
},
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class FullQueryBuilder < QueryBuilder
|
||||
private
|
||||
|
||||
def core_query
|
||||
{
|
||||
multi_match: {
|
||||
query: @query,
|
||||
type: 'most_fields',
|
||||
fields: %w(username^2 display_name^2 text text.*),
|
||||
operator: 'and',
|
||||
},
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def call(query, account = nil, options = {})
|
||||
@query = query&.strip&.gsub(/\A@/, '')
|
||||
@limit = options[:limit].to_i
|
||||
|
@ -71,27 +208,15 @@ class AccountSearchService < BaseService
|
|||
end
|
||||
|
||||
def from_elasticsearch
|
||||
must_clauses = must_clause
|
||||
should_clauses = should_clause
|
||||
|
||||
if account
|
||||
return [] if options[:following] && following_ids.empty?
|
||||
|
||||
if options[:following]
|
||||
must_clauses << { terms: { id: following_ids } }
|
||||
elsif following_ids.any?
|
||||
should_clauses << { terms: { id: following_ids, boost: 100 } }
|
||||
query_builder = begin
|
||||
if options[:use_searchable_text]
|
||||
FullQueryBuilder.new(terms_for_query, account, options.slice(:following))
|
||||
else
|
||||
AutocompleteQueryBuilder.new(terms_for_query, account, options.slice(:following))
|
||||
end
|
||||
end
|
||||
|
||||
query = { bool: { must: must_clauses, should: should_clauses } }
|
||||
functions = [reputation_score_function, followers_score_function, time_distance_function]
|
||||
|
||||
records = AccountsIndex.query(function_score: { query: query, functions: functions })
|
||||
.limit(limit_for_non_exact_results)
|
||||
.offset(offset)
|
||||
.objects
|
||||
.compact
|
||||
records = query_builder.build.limit(limit_for_non_exact_results).offset(offset).objects.compact
|
||||
|
||||
ActiveRecord::Associations::Preloader.new(records: records, associations: :account_stat)
|
||||
|
||||
|
@ -100,76 +225,6 @@ class AccountSearchService < BaseService
|
|||
nil
|
||||
end
|
||||
|
||||
def reputation_score_function
|
||||
{
|
||||
script_score: {
|
||||
script: {
|
||||
source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
def followers_score_function
|
||||
{
|
||||
script_score: {
|
||||
script: {
|
||||
source: "Math.log10(Math.max(doc['followers_count'].value, 0) + 2)",
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
def time_distance_function
|
||||
{
|
||||
gauss: {
|
||||
last_status_at: {
|
||||
scale: '30d',
|
||||
offset: '30d',
|
||||
decay: 0.3,
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
def must_clause
|
||||
if options[:start_with_hashtag]
|
||||
fields = %w(text text.*)
|
||||
else
|
||||
fields = %w(username username.* display_name display_name.*)
|
||||
fields << 'text' << 'text.*' if options[:use_searchable_text]
|
||||
end
|
||||
|
||||
[
|
||||
{
|
||||
multi_match: {
|
||||
query: terms_for_query,
|
||||
fields: fields,
|
||||
type: 'best_fields',
|
||||
operator: 'or',
|
||||
},
|
||||
},
|
||||
]
|
||||
end
|
||||
|
||||
def should_clause
|
||||
[
|
||||
{
|
||||
multi_match: {
|
||||
query: terms_for_query,
|
||||
fields: %w(username username.* display_name display_name.*),
|
||||
type: 'best_fields',
|
||||
operator: 'and',
|
||||
boost: 10,
|
||||
},
|
||||
},
|
||||
]
|
||||
end
|
||||
|
||||
def following_ids
|
||||
@following_ids ||= account.active_relationships.pluck(:target_account_id) + [account.id]
|
||||
end
|
||||
|
||||
def limit_for_non_exact_results
|
||||
return 0 if @account.nil? && query.size < MIN_QUERY_LENGTH
|
||||
|
||||
|
|
Loading…
Reference in a new issue