forked from mirrors/catstodon
Add advanced text formatting back into glitch-soc
This commit is contained in:
parent
f313bf3e97
commit
61cefbebf7
6 changed files with 459 additions and 29 deletions
|
@ -14,7 +14,7 @@ module FormattingHelper
|
|||
end
|
||||
|
||||
def status_content_format(status)
|
||||
html_aware_format(status.text, status.local?, preloaded_accounts: [status.account] + (status.respond_to?(:active_mentions) ? status.active_mentions.map(&:account) : []))
|
||||
html_aware_format(status.text, status.local?, preloaded_accounts: [status.account] + (status.respond_to?(:active_mentions) ? status.active_mentions.map(&:account) : []), content_type: status.content_type)
|
||||
end
|
||||
|
||||
def account_bio_format(account)
|
||||
|
|
131
app/lib/advanced_text_formatter.rb
Normal file
131
app/lib/advanced_text_formatter.rb
Normal file
|
@ -0,0 +1,131 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class AdvancedTextFormatter < TextFormatter
|
||||
class HTMLRenderer < Redcarpet::Render::HTML
|
||||
def initialize(options, &block)
|
||||
super(options)
|
||||
@format_link = block
|
||||
end
|
||||
|
||||
def block_code(code, _language)
|
||||
<<~HTML.squish
|
||||
<pre><code>#{h(code).gsub("\n", '<br/>')}</code></pre>
|
||||
HTML
|
||||
end
|
||||
|
||||
def autolink(link, link_type)
|
||||
return link if link_type == :email
|
||||
@format_link.call(link)
|
||||
end
|
||||
end
|
||||
|
||||
# @param [String] text
|
||||
# @param [Hash] options
|
||||
# @option options [Boolean] :multiline
|
||||
# @option options [Boolean] :with_domains
|
||||
# @option options [Boolean] :with_rel_me
|
||||
# @option options [Array<Account>] :preloaded_accounts
|
||||
# @option options [String] :content_type
|
||||
def initialize(text, options = {})
|
||||
content_type = options.delete(:content_type)
|
||||
super(text, options)
|
||||
|
||||
@text = format_markdown(text) if content_type == 'text/markdown'
|
||||
end
|
||||
|
||||
# Differs from TextFormatter by not messing with newline after parsing
|
||||
def to_s
|
||||
return ''.html_safe if text.blank?
|
||||
|
||||
html = rewrite do |entity|
|
||||
if entity[:url]
|
||||
link_to_url(entity)
|
||||
elsif entity[:hashtag]
|
||||
link_to_hashtag(entity)
|
||||
elsif entity[:screen_name]
|
||||
link_to_mention(entity)
|
||||
end
|
||||
end
|
||||
|
||||
html.html_safe # rubocop:disable Rails/OutputSafety
|
||||
end
|
||||
|
||||
# Differs from `TextFormatter` by skipping HTML tags and entities
|
||||
def entities
|
||||
@entities ||= begin
|
||||
gaps = []
|
||||
total_offset = 0
|
||||
|
||||
escaped = text.gsub(/<[^>]*>|&#[0-9]+;/) do |match|
|
||||
total_offset += match.length - 1
|
||||
end_offset = Regexp.last_match.end(0)
|
||||
gaps << [end_offset - total_offset, total_offset]
|
||||
' '
|
||||
end
|
||||
|
||||
Extractor.extract_entities_with_indices(escaped, extract_url_without_protocol: false).map do |entity|
|
||||
start_pos, end_pos = entity[:indices]
|
||||
offset_idx = gaps.rindex { |gap| gap.first <= start_pos }
|
||||
offset = offset_idx.nil? ? 0 : gaps[offset_idx].last
|
||||
entity.merge(indices: [start_pos + offset, end_pos + offset])
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Differs from `TextFormatter` in that it keeps HTML; but it sanitizes at the end to remain safe
|
||||
def rewrite
|
||||
entities.sort_by! do |entity|
|
||||
entity[:indices].first
|
||||
end
|
||||
|
||||
result = ''.dup
|
||||
|
||||
last_index = entities.reduce(0) do |index, entity|
|
||||
indices = entity[:indices]
|
||||
result << text[index...indices.first]
|
||||
result << yield(entity)
|
||||
indices.last
|
||||
end
|
||||
|
||||
result << text[last_index..-1]
|
||||
|
||||
Sanitize.fragment(result, Sanitize::Config::MASTODON_OUTGOING)
|
||||
end
|
||||
|
||||
def format_markdown(html)
|
||||
html = markdown_formatter.render(html)
|
||||
html.delete("\r").delete("\n")
|
||||
end
|
||||
|
||||
def markdown_formatter
|
||||
extensions = {
|
||||
autolink: true,
|
||||
no_intra_emphasis: true,
|
||||
fenced_code_blocks: true,
|
||||
disable_indented_code_blocks: true,
|
||||
strikethrough: true,
|
||||
lax_spacing: true,
|
||||
space_after_headers: true,
|
||||
superscript: true,
|
||||
underline: true,
|
||||
highlight: true,
|
||||
footnotes: false,
|
||||
}
|
||||
|
||||
renderer = HTMLRenderer.new({
|
||||
filter_html: false,
|
||||
escape_html: false,
|
||||
no_images: true,
|
||||
no_styles: true,
|
||||
safe_links_only: true,
|
||||
hard_wrap: true,
|
||||
link_attributes: { target: '_blank', rel: 'nofollow noopener' },
|
||||
}) do |url|
|
||||
link_to_url({ url: url })
|
||||
end
|
||||
|
||||
Redcarpet::Markdown.new(renderer, extensions)
|
||||
end
|
||||
end
|
|
@ -33,6 +33,10 @@ class HtmlAwareFormatter
|
|||
end
|
||||
|
||||
def linkify
|
||||
TextFormatter.new(text, options).to_s
|
||||
if %w(text/markdown text/html).include?(@options[:content_type])
|
||||
AdvancedTextFormatter.new(text, options).to_s
|
||||
else
|
||||
TextFormatter.new(text, options).to_s
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -55,18 +55,6 @@ class Sanitize
|
|||
end
|
||||
end
|
||||
|
||||
LINK_REL_TRANSFORMER = lambda do |env|
|
||||
return unless env[:node_name] == 'a' and env[:node]['href']
|
||||
|
||||
node = env[:node]
|
||||
|
||||
rel = (node['rel'] || '').split(' ') & ['tag']
|
||||
unless env[:config][:outgoing] && TagManager.instance.local_url?(node['href'])
|
||||
rel += ['nofollow', 'noopener', 'noreferrer']
|
||||
end
|
||||
node['rel'] = rel.join(' ')
|
||||
end
|
||||
|
||||
UNSUPPORTED_HREF_TRANSFORMER = lambda do |env|
|
||||
return unless env[:node_name] == 'a'
|
||||
|
||||
|
@ -97,6 +85,7 @@ class Sanitize
|
|||
|
||||
add_attributes: {
|
||||
'a' => {
|
||||
'rel' => 'nofollow noopener noreferrer',
|
||||
'target' => '_blank',
|
||||
},
|
||||
},
|
||||
|
@ -110,7 +99,6 @@ class Sanitize
|
|||
CLASS_WHITELIST_TRANSFORMER,
|
||||
IMG_TAG_TRANSFORMER,
|
||||
UNSUPPORTED_HREF_TRANSFORMER,
|
||||
LINK_REL_TRANSFORMER,
|
||||
]
|
||||
)
|
||||
|
||||
|
@ -135,5 +123,48 @@ class Sanitize
|
|||
'source' => { 'src' => HTTP_PROTOCOLS }
|
||||
)
|
||||
)
|
||||
|
||||
LINK_REL_TRANSFORMER = lambda do |env|
|
||||
return unless env[:node_name] == 'a' && env[:node]['href']
|
||||
|
||||
node = env[:node]
|
||||
|
||||
rel = (node['rel'] || '').split(' ') & ['tag']
|
||||
rel += ['nofollow', 'noopener', 'noreferrer'] unless TagManager.instance.local_url?(node['href'])
|
||||
|
||||
if rel.empty?
|
||||
node['rel']&.delete
|
||||
else
|
||||
node['rel'] = rel.join(' ')
|
||||
end
|
||||
end
|
||||
|
||||
LINK_TARGET_TRANSFORMER = lambda do |env|
|
||||
return unless env[:node_name] == 'a' && env[:node]['href']
|
||||
|
||||
node = env[:node]
|
||||
if node['target'] != '_blank' && TagManager.instance.local_url?(node['href'])
|
||||
node['target']&.delete
|
||||
else
|
||||
node['target'] = '_blank'
|
||||
end
|
||||
end
|
||||
|
||||
MASTODON_OUTGOING ||= freeze_config MASTODON_STRICT.merge(
|
||||
attributes: merge(
|
||||
MASTODON_STRICT[:attributes],
|
||||
'a' => %w(href rel class title target)
|
||||
),
|
||||
|
||||
add_attributes: {},
|
||||
|
||||
transformers: [
|
||||
CLASS_WHITELIST_TRANSFORMER,
|
||||
IMG_TAG_TRANSFORMER,
|
||||
UNSUPPORTED_HREF_TRANSFORMER,
|
||||
LINK_REL_TRANSFORMER,
|
||||
LINK_TARGET_TRANSFORMER,
|
||||
]
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
274
spec/lib/advanced_text_formatter_spec.rb
Normal file
274
spec/lib/advanced_text_formatter_spec.rb
Normal file
|
@ -0,0 +1,274 @@
|
|||
require 'rails_helper'
|
||||
|
||||
RSpec.describe AdvancedTextFormatter do
|
||||
describe '#to_s' do
|
||||
let(:preloaded_accounts) { nil }
|
||||
let(:content_type) { 'text/markdown' }
|
||||
|
||||
subject { described_class.new(text, preloaded_accounts: preloaded_accounts, content_type: content_type).to_s }
|
||||
|
||||
context 'given a markdown source' do
|
||||
let(:content_type) { 'text/markdown' }
|
||||
|
||||
context 'given text containing plain text' do
|
||||
let(:text) { 'text' }
|
||||
|
||||
it 'paragraphizes the text' do
|
||||
is_expected.to eq '<p>text</p>'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given text containing line feeds' do
|
||||
let(:text) { "line\nfeed" }
|
||||
|
||||
it 'removes line feeds' do
|
||||
is_expected.not_to include "\n"
|
||||
end
|
||||
end
|
||||
|
||||
context 'given some inline code using backticks' do
|
||||
let(:text) { 'test `foo` bar' }
|
||||
|
||||
it 'formats code using <code>' do
|
||||
is_expected.to include 'test <code>foo</code> bar'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given some quote' do
|
||||
let(:text) { "> foo\n\nbar" }
|
||||
|
||||
it 'formats code using <code>' do
|
||||
is_expected.to include '<blockquote><p>foo</p></blockquote>'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given text containing linkable mentions' do
|
||||
let(:preloaded_accounts) { [Fabricate(:account, username: 'alice')] }
|
||||
let(:text) { '@alice' }
|
||||
|
||||
it 'creates a mention link' do
|
||||
is_expected.to include '<a href="https://cb6e6126.ngrok.io/@alice" class="u-url mention">@<span>alice</span></a></span>'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given text containing unlinkable mentions' do
|
||||
let(:preloaded_accounts) { [] }
|
||||
let(:text) { '@alice' }
|
||||
|
||||
it 'does not create a mention link' do
|
||||
is_expected.to include '@alice'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a stand-alone medium URL' do
|
||||
let(:text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' }
|
||||
|
||||
it 'matches the full URL' do
|
||||
is_expected.to include 'href="https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a stand-alone google URL' do
|
||||
let(:text) { 'http://google.com' }
|
||||
|
||||
it 'matches the full URL' do
|
||||
is_expected.to include 'href="http://google.com"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a stand-alone URL with a newer TLD' do
|
||||
let(:text) { 'http://example.gay' }
|
||||
|
||||
it 'matches the full URL' do
|
||||
is_expected.to include 'href="http://example.gay"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a stand-alone IDN URL' do
|
||||
let(:text) { 'https://nic.みんな/' }
|
||||
|
||||
it 'matches the full URL' do
|
||||
is_expected.to include 'href="https://nic.みんな/"'
|
||||
end
|
||||
|
||||
it 'has display URL' do
|
||||
is_expected.to include '<span class="">nic.みんな/</span>'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a URL with a trailing period' do
|
||||
let(:text) { 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ' }
|
||||
|
||||
it 'matches the full URL but not the period' do
|
||||
is_expected.to include 'href="http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a URL enclosed with parentheses' do
|
||||
let(:text) { '(http://google.com/)' }
|
||||
|
||||
it 'matches the full URL but not the parentheses' do
|
||||
is_expected.to include 'href="http://google.com/"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a URL with a trailing exclamation point' do
|
||||
let(:text) { 'http://www.google.com!' }
|
||||
|
||||
it 'matches the full URL but not the exclamation point' do
|
||||
is_expected.to include 'href="http://www.google.com"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a URL with a trailing single quote' do
|
||||
let(:text) { "http://www.google.com'" }
|
||||
|
||||
it 'matches the full URL but not the single quote' do
|
||||
is_expected.to include 'href="http://www.google.com"'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a URL with a trailing angle bracket' do
|
||||
let(:text) { 'http://www.google.com>' }
|
||||
|
||||
it 'matches the full URL but not the angle bracket' do
|
||||
is_expected.to include 'href="http://www.google.com"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a URL with a query string' do
|
||||
context 'with escaped unicode character' do
|
||||
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' }
|
||||
|
||||
it 'matches the full URL' do
|
||||
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'with unicode character' do
|
||||
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓&q=autolink' }
|
||||
|
||||
it 'matches the full URL' do
|
||||
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓&q=autolink"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'with unicode character at the end' do
|
||||
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓' }
|
||||
|
||||
it 'matches the full URL' do
|
||||
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'with escaped and not escaped unicode characters' do
|
||||
let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink' }
|
||||
|
||||
it 'preserves escaped unicode characters' do
|
||||
is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a URL with parentheses in it' do
|
||||
let(:text) { 'https://en.wikipedia.org/wiki/Diaspora_(software)' }
|
||||
|
||||
it 'matches the full URL' do
|
||||
is_expected.to include 'href="https://en.wikipedia.org/wiki/Diaspora_(software)"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a URL in quotation marks' do
|
||||
let(:text) { '"https://example.com/"' }
|
||||
|
||||
it 'does not match the quotation marks' do
|
||||
is_expected.to include 'href="https://example.com/"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a URL in angle brackets' do
|
||||
let(:text) { '<https://example.com/>' }
|
||||
|
||||
it 'does not match the angle brackets' do
|
||||
is_expected.to include 'href="https://example.com/"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given a URL containing unsafe code (XSS attack, invisible part)' do
|
||||
let(:text) { %q{http://example.com/blahblahblahblah/a<script>alert("Hello")</script>} }
|
||||
|
||||
it 'does not include the HTML in the URL' do
|
||||
is_expected.to include '"http://example.com/blahblahblahblah/a"'
|
||||
end
|
||||
|
||||
it 'does not include a script tag' do
|
||||
is_expected.to_not include '<script>'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given text containing HTML code (script tag)' do
|
||||
let(:text) { '<script>alert("Hello")</script>' }
|
||||
|
||||
it 'does not include a script tag' do
|
||||
is_expected.to_not include '<script>'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given text containing HTML (XSS attack)' do
|
||||
let(:text) { %q{<img src="javascript:alert('XSS');">} }
|
||||
|
||||
it 'does not include the javascript' do
|
||||
is_expected.to_not include 'href="javascript:'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given an invalid URL' do
|
||||
let(:text) { 'http://www\.google\.com' }
|
||||
|
||||
it 'outputs the raw URL' do
|
||||
is_expected.to eq '<p>http://www\.google\.com</p>'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given text containing a hashtag' do
|
||||
let(:text) { '#hashtag' }
|
||||
|
||||
it 'creates a hashtag link' do
|
||||
is_expected.to include '/tags/hashtag" class="mention hashtag" rel="tag">#<span>hashtag</span></a>'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given text containing a hashtag with Unicode chars' do
|
||||
let(:text) { '#hashtagタグ' }
|
||||
|
||||
it 'creates a hashtag link' do
|
||||
is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given text with a stand-alone xmpp: URI' do
|
||||
let(:text) { 'xmpp:user@instance.com' }
|
||||
|
||||
it 'matches the full URI' do
|
||||
is_expected.to include 'href="xmpp:user@instance.com"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given text with an xmpp: URI with a query-string' do
|
||||
let(:text) { 'please join xmpp:muc@instance.com?join right now' }
|
||||
|
||||
it 'matches the full URI' do
|
||||
is_expected.to include 'href="xmpp:muc@instance.com?join"'
|
||||
end
|
||||
end
|
||||
|
||||
context 'given text containing a magnet: URI' do
|
||||
let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
|
||||
|
||||
it 'matches the full URI' do
|
||||
is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -41,18 +41,8 @@ describe Sanitize::Config do
|
|||
end
|
||||
end
|
||||
|
||||
describe '::MASTODON_STRICT' do
|
||||
subject { Sanitize::Config::MASTODON_STRICT }
|
||||
|
||||
it_behaves_like 'common HTML sanitization'
|
||||
|
||||
it 'keeps a with href and rel tag' do
|
||||
expect(Sanitize.fragment('<a href="http://example.com" rel="tag">Test</a>', subject)).to eq '<a href="http://example.com" rel="tag nofollow noopener noreferrer" target="_blank">Test</a>'
|
||||
end
|
||||
end
|
||||
|
||||
describe '::MASTODON_STRICT with outgoing toots' do
|
||||
subject { Sanitize::Config::MASTODON_STRICT.merge(outgoing: true) }
|
||||
describe '::MASTODON_OUTGOING' do
|
||||
subject { Sanitize::Config::MASTODON_OUTGOING }
|
||||
|
||||
around do |example|
|
||||
original_web_domain = Rails.configuration.x.web_domain
|
||||
|
@ -62,9 +52,9 @@ describe Sanitize::Config do
|
|||
|
||||
it_behaves_like 'common HTML sanitization'
|
||||
|
||||
it 'keeps a with href and rel tag, not adding to rel if url is local' do
|
||||
it 'keeps a with href and rel tag, not adding to rel or target if url is local' do
|
||||
Rails.configuration.x.web_domain = 'domain.test'
|
||||
expect(Sanitize.fragment('<a href="http://domain.test/tags/foo" rel="tag">Test</a>', subject)).to eq '<a href="http://domain.test/tags/foo" rel="tag" target="_blank">Test</a>'
|
||||
expect(Sanitize.fragment('<a href="http://domain.test/tags/foo" rel="tag">Test</a>', subject)).to eq '<a href="http://domain.test/tags/foo" rel="tag">Test</a>'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue