diff --git a/lib/gollum.rb b/lib/gollum.rb index 412fcbb8..78b0010a 100644 --- a/lib/gollum.rb +++ b/lib/gollum.rb @@ -18,57 +18,13 @@ require 'gollum/page' require 'gollum/file' require 'gollum/markup' require 'gollum/albino' +require 'gollum/sanitization' module Gollum VERSION = '1.1.0' - SANITIZATION_OPTIONS = { - :elements => [ - 'a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', - 'blockquote', 'br', 'button', 'caption', 'center', 'cite', - 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', - 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'form', 'h1', - 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', - 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', - 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp', - 'select', 'small', 'span', 'strike', 'strong', 'sub', - 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', - 'thead', 'tr', 'tt', 'u', 'ul', 'var' - ], - :attributes => { - 'a' => ['href'], - 'img' => ['src'], - :all => ['abbr', 'accept', 'accept-charset', - 'accesskey', 'action', 'align', 'alt', 'axis', - 'border', 'cellpadding', 'cellspacing', 'char', - 'charoff', 'charset', 'checked', 'cite', - 'clear', 'cols', 'colspan', 'color', - 'compact', 'coords', 'datetime', 'dir', - 'disabled', 'enctype', 'for', 'frame', - 'headers', 'height', 'hreflang', - 'hspace', 'ismap', 'label', 'lang', - 'longdesc', 'maxlength', 'media', 'method', - 'multiple', 'name', 'nohref', 'noshade', - 'nowrap', 'prompt', 'readonly', 'rel', 'rev', - 'rows', 'rowspan', 'rules', 'scope', - 'selected', 'shape', 'size', 'span', - 'start', 'summary', 'tabindex', 'target', - 'title', 'type', 'usemap', 'valign', 'value', - 'vspace', 'width'] - }, - :protocols => { - 'a' => {'href' => ['http', 'https', 'mailto', :relative]}, - 'img' => {'src' => ['http', 'https', :relative]} - } - } - - HISTORY_SANITIZATION_OPTIONS = SANITIZATION_OPTIONS.merge( - :add_attributes => { - 'a' => {'rel' => 'nofollow'} - } - ) - class Error < StandardError; end + class DuplicatePageError < Error attr_accessor :dir attr_accessor :existing_path diff --git a/lib/gollum/markup.rb b/lib/gollum/markup.rb index 53073ce0..1ed9be0d 100644 --- a/lib/gollum/markup.rb +++ b/lib/gollum/markup.rb @@ -27,9 +27,10 @@ module Gollum # # Returns the formatted String content. def render(no_follow = false) - sanitize_options = no_follow ? - HISTORY_SANITIZATION_OPTIONS : - SANITIZATION_OPTIONS + sanitize_options = no_follow ? + @wiki.history_sanitization : + @wiki.sanitization + data = extract_tex(@data) data = extract_code(data) data = extract_tags(data) @@ -43,7 +44,7 @@ module Gollum end data = process_tags(data) data = process_code(data) - data = Sanitize.clean(data, sanitize_options) + data = Sanitize.clean(data, sanitize_options.to_hash) if sanitize_options data = process_tex(data) data.gsub!(/

<\/p>/, '') data diff --git a/lib/gollum/sanitization.rb b/lib/gollum/sanitization.rb new file mode 100644 index 00000000..72ba2ec9 --- /dev/null +++ b/lib/gollum/sanitization.rb @@ -0,0 +1,109 @@ +module Gollum + + # Encapsulate sanitization options. + # + # This class does not yet support all options of Sanitize library. + # See http://github.com/rgrove/sanitize/. + class Sanitization + # Default whitelisted elements. + ELEMENTS = [ + 'a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', + 'blockquote', 'br', 'button', 'caption', 'center', 'cite', + 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', + 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'form', 'h1', + 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', + 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', + 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp', + 'select', 'small', 'span', 'strike', 'strong', 'sub', + 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', + 'thead', 'tr', 'tt', 'u', 'ul', 'var' + ].freeze + + # Default whitelisted attributes. + ATTRIBUTES = { + 'a' => ['href'], + 'img' => ['src'], + :all => ['abbr', 'accept', 'accept-charset', + 'accesskey', 'action', 'align', 'alt', 'axis', + 'border', 'cellpadding', 'cellspacing', 'char', + 'charoff', 'class', 'charset', 'checked', 'cite', + 'clear', 'cols', 'colspan', 'color', + 'compact', 'coords', 'datetime', 'dir', + 'disabled', 'enctype', 'for', 'frame', + 'headers', 'height', 'hreflang', + 'hspace', 'ismap', 'label', 'lang', + 'longdesc', 'maxlength', 'media', 'method', + 'multiple', 'name', 'nohref', 'noshade', + 'nowrap', 'prompt', 'readonly', 'rel', 'rev', + 'rows', 'rowspan', 'rules', 'scope', + 'selected', 'shape', 'size', 'span', + 'start', 'summary', 'tabindex', 'target', + 'title', 'type', 'usemap', 'valign', 'value', + 'vspace', 'width'] + }.freeze + + # Default whitelisted protocols for URLs. + PROTOCOLS = { + 'a' => {'href' => ['http', 'https', 'mailto', :relative]}, + 'img' => {'src' => ['http', 'https', :relative]} + }.freeze + + # Gets an Array of whitelisted HTML elements. Default: ELEMENTS. + attr_reader :elements + + # Gets a Hash describing which attributes are allowed in which HTML + # elements. Default: ATTRIBUTES. + attr_reader :attributes + + # Gets a Hash describing which URI protocols are allowed in HTML + # attributes. Default: PROTOCOLS + attr_reader :protocols + + # Gets a Hash describing HTML attributes that Sanitize should add. + # Default: {} + attr_reader :add_attributes + + # Sets a boolean determining whether Sanitize allows HTML comments in the + # output. Default: false. + attr_writer :allow_comments + + def initialize + @elements = ELEMENTS + @attributes = ATTRIBUTES + @protocols = PROTOCOLS + @add_attributes = {} + @allow_comments = false + yield self if block_given? + end + + # Determines if Sanitize should allow HTML comments. + # + # Returns True if comments are allowed, or False. + def allow_comments? + !!@allow_comments + end + + # Modifies the current Sanitization instance to sanitize older revisions + # of pages. + # + # Returns a Sanitization instance. + def history_sanitization + self.class.new do |sanitize| + sanitize.add_attributes['a'] = {'rel' => 'nofollow'} + end + end + + # Builds a Hash of options suitable for Sanitize.clean. + # + # Returns a Hash. + def to_hash + { :elements => elements, + :attributes => attributes, + :protocols => protocols, + :add_attributes => add_attributes, + :allow_comments => allow_comments? + } + end + end +end + diff --git a/lib/gollum/wiki.rb b/lib/gollum/wiki.rb index 30fade8a..6afd74da 100644 --- a/lib/gollum/wiki.rb +++ b/lib/gollum/wiki.rb @@ -15,6 +15,14 @@ module Gollum # Sets the default email for commits. attr_accessor :default_committer_email + # Sets sanitization options. Set to false to deactivate + # sanitization altogether. + attr_writer :sanitization + + # Sets sanitization options. Set to false to deactivate + # sanitization altogether. + attr_writer :history_sanitization + # Gets the page class used by all instances of this Wiki. # Default: Gollum::Page. def page_class @@ -36,6 +44,26 @@ module Gollum ::Gollum::File end end + + # Gets the default sanitization options for current pages used by + # instances of this Wiki. + def sanitization + if @sanitization.nil? + @sanitization = Sanitization.new + end + @sanitization + end + + # Gets the default sanitization options for older page revisions used by + # instances of this Wiki. + def history_sanitization + if @history_sanitization.nil? + @history_sanitization = sanitization ? + sanitization.history_sanitization : + false + end + @history_sanitization + end end self.default_committer_name = 'Anonymous' @@ -46,23 +74,34 @@ module Gollum # to "/". attr_reader :base_path + # Gets the sanitization options for current pages used by this Wiki. + attr_reader :sanitization + + # Gets the sanitization options for older page revisions used by this Wiki. + attr_reader :history_sanitization + # Public: Initialize a new Gollum Repo. # # repo - The String path to the Git repository that holds the Gollum # site. # options - Optional Hash: - # :base_path - String base path for all Wiki links. - # Default: "/" - # :page_class - The page Class. Default: Gollum::Page - # :file_class - The file Class. Default: Gollum::File + # :base_path - String base path for all Wiki links. + # Default: "/" + # :page_class - The page Class. Default: Gollum::Page + # :file_class - The file Class. Default: Gollum::File + # :sanitization - An instance of Sanitization. # # Returns a fresh Gollum::Repo. def initialize(path, options = {}) - @path = path - @repo = Grit::Repo.new(path) - @base_path = options[:base_path] || "/" - @page_class = options[:page_class] || self.class.page_class - @file_class = options[:file_class] || self.class.file_class + @path = path + @repo = Grit::Repo.new(path) + @base_path = options[:base_path] || "/" + @page_class = options[:page_class] || self.class.page_class + @file_class = options[:file_class] || self.class.file_class + @sanitization = options[:sanitization] || self.class.sanitization + @history_sanitization = options[:history_sanitization] || + self.class.history_sanitization + clear_cache end