module Patron::ResponseDecoding

Contains methods used for decoding the HTTP response body. These are only ever used internally by the Response class.

Constants

CHARSET_CONTENT_TYPE_RE
INTERNAL_CHARSET_MISMATCH_ERROR
INVALID_CHARSET_NAME_ERROR
MISREPORTED_ENCODING_ERROR

Private Instance Methods

charset_from_content_type() click to toggle source
# File lib/patron/response_decoding.rb, line 63
def charset_from_content_type
  return $1 if @headers["Content-Type"].to_s =~ CHARSET_CONTENT_TYPE_RE
end
convert_encoding_and_raise(str) click to toggle source
# File lib/patron/response_decoding.rb, line 54
def convert_encoding_and_raise(str)
  internal = internal_encoding
  str.encode(internal)
rescue Encoding::UndefinedConversionError => e
  enc = str.encoding == Encoding::BINARY ? 'binary' : str.encoding.to_s
  raise NonRepresentableBody,
    INTERNAL_CHARSET_MISMATCH_ERROR % {source_encoding: enc, target_encoding: internal}
end
decode_body(strict) click to toggle source
# File lib/patron/response_decoding.rb, line 37
def decode_body(strict)
  # Try to detect the body encoding from headers
  body_encoding = encoding_from_headers_or_binary
  
  # See if the body actually _is_ in this encoding.
  encoding_matched = @body.force_encoding(body_encoding).valid_encoding?
  if !encoding_matched
    raise HeaderCharsetInvalid,  MISREPORTED_ENCODING_ERROR % {declared: body_encoding}
  end
  
  if strict
    convert_encoding_and_raise(@body)
  else
    @body.encode(internal_encoding, :undefined => :replace, :replace => '?')
  end
end
decode_header_data(str) click to toggle source
# File lib/patron/response_decoding.rb, line 85
def decode_header_data(str)
  # Header data is tricky. Strictly speaking, it _must_ be ISO-encoded. However, Content-Disposition
  # sometimes gets sent as raw UTF8 - and most browsers (except for localized IE versions on Windows)
  # treat it as such. So a fallback chain of 8859-1->UTF8->binary seems the most sane.
  tries = [Encoding::ISO8859_1, Encoding::UTF_8, Encoding::BINARY]
  tries.each do |possible_enc|
    begin
      return str.encode(possible_enc)
    rescue ::Encoding::UndefinedConversionError
      next
    end
  end
  str # if it doesn't encode, just give back what we got
end
encoding_from_headers_or_binary() click to toggle source
# File lib/patron/response_decoding.rb, line 67
def encoding_from_headers_or_binary
  return Encoding::BINARY unless charset_name = charset_from_content_type
  Encoding.find(charset_name)
rescue ArgumentError => e # invalid charset name
  raise HeaderCharsetInvalid,
        INVALID_CHARSET_NAME_ERROR % {content_type: @headers['Content-Type'].inspect}
end
internal_encoding() click to toggle source
# File lib/patron/response_decoding.rb, line 75
def internal_encoding
  # Use a trick here - instead of using `default_internal` we will create
  # an empty string, and then get it's encoding instead. For example, this holds
  # true on 2.1+ on OSX:
  #
  #     Encoding.default_internal #=> nil
  #     ''.encoding #=> #<Encoding:UTF-8>
  Encoding.default_internal || ''.encoding
end