diff --git a/lib/pleroma/object/containment.ex b/lib/pleroma/object/containment.ex index 37bc20e4d..7b1cc37bd 100644 --- a/lib/pleroma/object/containment.ex +++ b/lib/pleroma/object/containment.ex @@ -12,8 +12,6 @@ defmodule Pleroma.Object.Containment do spoofing, therefore removal of object containment functions is NOT recommended. """ - alias Pleroma.Web.ActivityPub.Transmogrifier - def get_actor(%{"actor" => actor}) when is_binary(actor) do actor end @@ -50,16 +48,39 @@ def get_object(_) do defp compare_uris(%URI{host: host} = _id_uri, %URI{host: host} = _other_uri), do: :ok defp compare_uris(_id_uri, _other_uri), do: :error - defp compare_uris_exact(uri, uri), do: :ok + defp uri_strip_slash(%URI{path: path} = uri) when is_binary(path), + do: %{uri | path: String.replace_suffix(path, "/", "")} - defp compare_uris_exact(%URI{} = id, %URI{} = other), - do: compare_uris_exact(URI.to_string(id), URI.to_string(other)) + defp uri_strip_slash(uri), do: uri - defp compare_uris_exact(id_uri, other_uri) - when is_binary(id_uri) and is_binary(other_uri) do - norm_id = String.replace_suffix(id_uri, "/", "") - norm_other = String.replace_suffix(other_uri, "/", "") - if norm_id == norm_other, do: :ok, else: :error + # domain names are case-insensitive per spec (other parts of URIs aren’t necessarily) + defp uri_normalise_host(%URI{host: host} = uri) when is_binary(host), + do: %{uri | host: String.downcase(host, :ascii)} + + defp uri_normalise_host(uri), do: uri + + defp compare_uri_identities(uri, uri), do: :ok + + defp compare_uri_identities(id_uri, other_uri) when is_binary(id_uri) and is_binary(other_uri), + do: compare_uri_identities(URI.parse(id_uri), URI.parse(other_uri)) + + defp compare_uri_identities(%URI{} = id, %URI{} = other) do + normid = + %{id | fragment: nil} + |> uri_strip_slash() + |> uri_normalise_host() + + normother = + %{other | fragment: nil} + |> uri_strip_slash() + |> uri_normalise_host() + + # Conversion back to binary avoids issues from non-normalised deprecated authority field + if URI.to_string(normid) == URI.to_string(normother) do + :ok + else + :error + end end @doc """ @@ -93,21 +114,13 @@ def contain_origin(id, %{"attributedTo" => actor} = params), def contain_origin(_id, _data), do: :ok @doc """ - Check whether the fetch URL (after redirects) exactly (sans tralining slash) matches either - the canonical ActivityPub id or the objects url field (for display URLs from *key and Mastodon) + Check whether the fetch URL (after redirects) is the + same location the canonical ActivityPub id points to. Since this is meant to be used for fetches, anonymous or transient objects are not accepted here. """ - def contain_id_to_fetch(url, %{"id" => id} = data) when is_binary(id) do - with {:id, :error} <- {:id, compare_uris_exact(id, url)}, - # "url" can be a "Link" object and this is checked before full normalisation - display_url <- Transmogrifier.fix_url(data)["url"], - true <- display_url != nil do - compare_uris_exact(display_url, url) - else - {:id, :ok} -> :ok - _ -> :error - end + def contain_id_to_fetch(url, %{"id" => id}) when is_binary(id) do + compare_uri_identities(url, id) end def contain_id_to_fetch(_url, _data), do: :error diff --git a/lib/pleroma/object/fetcher.ex b/lib/pleroma/object/fetcher.ex index 937026e04..2cecfec67 100644 --- a/lib/pleroma/object/fetcher.ex +++ b/lib/pleroma/object/fetcher.ex @@ -116,7 +116,7 @@ defp reinject_object(%Object{} = object, new_data) do @doc "Assumes object already is in our database and refetches from remote to update (e.g. for polls)" def refetch_object(%Object{data: %{"id" => id}} = object) do with {:local, false} <- {:local, Object.local?(object)}, - {:ok, new_data} <- fetch_and_contain_remote_object_from_id(id), + {:ok, new_data} <- fetch_and_contain_remote_object_from_id(id, true), {:id, true} <- {:id, new_data["id"] == id}, {:ok, object} <- reinject_object(object, new_data) do {:ok, object} @@ -253,14 +253,17 @@ defp maybe_date_fetch(headers, date) do end end - @doc "Fetches arbitrary remote object and performs basic safety and authenticity checks" - def fetch_and_contain_remote_object_from_id(id) + @doc """ + Fetches arbitrary remote object and performs basic safety and authenticity checks. + When the fetch URL is known to already be a canonical AP id, checks are stricter. + """ + def fetch_and_contain_remote_object_from_id(id, is_ap_id \\ false) - def fetch_and_contain_remote_object_from_id(%{"id" => id}), - do: fetch_and_contain_remote_object_from_id(id) + def fetch_and_contain_remote_object_from_id(%{"id" => id}, is_ap_id), + do: fetch_and_contain_remote_object_from_id(id, is_ap_id) - def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do - Logger.debug("Fetching object #{id} via AP") + def fetch_and_contain_remote_object_from_id(id, is_ap_id) when is_binary(id) do + Logger.debug("Fetching object #{id} via AP [ap_id=#{is_ap_id}]") with {:valid_uri_scheme, true} <- {:valid_uri_scheme, String.starts_with?(id, "http")}, %URI{} = uri <- URI.parse(id), @@ -270,18 +273,31 @@ def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do {:mrf_accept_check, Pleroma.Web.ActivityPub.MRF.SimplePolicy.check_accept(uri)}, {:local_fetch, :ok} <- {:local_fetch, Containment.contain_local_fetch(id)}, {:ok, final_id, body} <- get_object(id), + # a canonical ID shouldn't be a redirect + true <- !is_ap_id || final_id == id, {:ok, data} <- safe_json_decode(body), - {_, :ok} <- {:strict_id, Containment.contain_id_to_fetch(final_id, data)}, - {_, :ok} <- {:containment, Containment.contain_origin(final_id, data)} do + {_, :ok} <- {:containment, Containment.contain_origin(final_id, data)}, + {_, _, :ok} <- {:strict_id, data["id"], Containment.contain_id_to_fetch(final_id, data)} do unless Instances.reachable?(final_id) do Instances.set_reachable(final_id) end {:ok, data} else - {:strict_id, _} = e -> - log_fetch_error(id, e) - {:error, :id_mismatch} + # E.g. Mastodon and *key serve the AP object directly under their display URLs without + # redirecting to their canonical location first, thus ids will expectedly differ. + # Similarly keys, either use a fragment ID and are a subobjects or a distinct ID + # but for compatibility are still a subobject presenting their owning actors ID at the toplevel. + # Refetching _once_ from the listed id, should yield a strict match afterwards. + {:strict_id, ap_id, _} = e -> + case is_ap_id do + false -> + fetch_and_contain_remote_object_from_id(ap_id, true) + + true -> + log_fetch_error(id, e) + {:error, :id_mismatch} + end {:mrf_reject_check, _} = e -> log_fetch_error(id, e) @@ -301,7 +317,7 @@ def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do {:containment, reason} -> log_fetch_error(id, reason) - {:error, reason} + {:error, {:containment, reason}} {:error, e} -> {:error, e} @@ -311,25 +327,13 @@ def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do end end - def fetch_and_contain_remote_object_from_id(_id), + def fetch_and_contain_remote_object_from_id(_id, _is_ap_id), do: {:error, :invalid_id} - defp check_crossdomain_redirect(final_host, original_url) - # HOPEFULLY TEMPORARY # Basically none of our Tesla mocks in tests set the (supposed to # exist for Tesla proper) url parameter for their responses # causing almost every fetch in test to fail otherwise - if @mix_env == :test do - defp check_crossdomain_redirect(nil, _) do - {:cross_domain_redirect, false} - end - end - - defp check_crossdomain_redirect(final_host, original_url) do - {:cross_domain_redirect, final_host != URI.parse(original_url).host} - end - if @mix_env == :test do defp get_final_id(nil, initial_url), do: initial_url defp get_final_id("", initial_url), do: initial_url @@ -355,10 +359,6 @@ def get_object(id) do with {:ok, %{body: body, status: code, headers: headers, url: final_url}} when code in 200..299 <- HTTP.Backoff.get(id, headers), - remote_host <- - URI.parse(final_url).host, - {:cross_domain_redirect, false} <- - check_crossdomain_redirect(remote_host, id), {:has_content_type, {_, content_type}} <- {:has_content_type, List.keyfind(headers, "content-type", 0)}, {:parse_content_type, {:ok, "application", subtype, type_params}} <- diff --git a/test/pleroma/object/containment_test.exs b/test/pleroma/object/containment_test.exs index f8f40a3ac..1a1d01473 100644 --- a/test/pleroma/object/containment_test.exs +++ b/test/pleroma/object/containment_test.exs @@ -9,7 +9,6 @@ defmodule Pleroma.Object.ContainmentTest do alias Pleroma.User import Pleroma.Factory - import ExUnit.CaptureLog setup_all do Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) @@ -136,23 +135,17 @@ test "contain_id_to_fetch() allows matching IDs" do ) end - test "contain_id_to_fetch() allows display URLs" do + test "contain_id_to_fetch() allows fragments and normalises domain casing" do data = %{ - "id" => "http://example.com/~alyssa/activities/1234.json", - "url" => "http://example.com/@alyssa/status/1234" + "id" => "http://example.com/users/capybara", + "url" => "http://example.com/@capybara" } - :ok = - Containment.contain_id_to_fetch( - "http://example.com/@alyssa/status/1234", - data - ) - - :ok = - Containment.contain_id_to_fetch( - "http://example.com/@alyssa/status/1234/", - data - ) + assert :ok == + Containment.contain_id_to_fetch( + "http://EXAMPLE.com/users/capybara#key", + data + ) end test "users cannot be collided through fake direction spoofing attempts" do @@ -164,10 +157,14 @@ test "users cannot be collided through fake direction spoofing attempts" do follower_address: User.ap_followers(%User{nickname: "rye@niu.moe"}) }) - assert capture_log(fn -> - {:error, _} = User.get_or_fetch_by_ap_id("https://n1u.moe/users/rye") - end) =~ - "[error] Could not decode user at fetch https://n1u.moe/users/rye" + # Fetch from an attempted spoof id will suceed, but automatically retrieve + # the real data from the homeserver instead of naïvely using the spoof + {:ok, fetched_user} = User.get_or_fetch_by_ap_id("https://n1u.moe/users/rye") + + refute fetched_user.name == "evil rye" + refute fetched_user.raw_bio == "boooo!" + assert fetched_user.name == "♡ rye ♡" + assert fetched_user.nickname == "rye@niu.moe" end test "contain_origin_from_id() gracefully handles cases where no ID is present" do diff --git a/test/pleroma/object/fetcher_test.exs b/test/pleroma/object/fetcher_test.exs index 12154cb05..84bf0aa05 100644 --- a/test/pleroma/object/fetcher_test.exs +++ b/test/pleroma/object/fetcher_test.exs @@ -22,6 +22,7 @@ defp spoofed_object_with_ids( |> Jason.decode!() |> Map.put("id", id) |> Map.put("actor", actor_id) + |> Map.put("attributedTo", actor_id) |> Jason.encode!() end @@ -109,7 +110,7 @@ defp spoofed_object_with_ids( body: spoofed_object_with_ids("https://patch.cx/objects/spoof_media_redirect1") } - # Spoof: cross-domain redirect with final domain id + # Spoof: cross-domain redirect with final domain id, but original id actor %{method: :get, url: "https://patch.cx/objects/spoof_media_redirect2"} -> %Tesla.Env{ status: 200, @@ -118,6 +119,19 @@ defp spoofed_object_with_ids( body: spoofed_object_with_ids("https://media.patch.cx/objects/spoof_media_redirect2") } + # No-Spoof: cross-domain redirect with id and actor from final domain + %{method: :get, url: "https://patch.cx/objects/spoof_media_redirect3"} -> + %Tesla.Env{ + status: 200, + url: "https://media.patch.cx/objects/spoof_media_redirect3", + headers: [{"content-type", "application/activity+json"}], + body: + spoofed_object_with_ids( + "https://media.patch.cx/objects/spoof_media_redirect3", + "https://media.patch.cx/users/rin" + ) + } + # No-Spoof: same domain redirect %{method: :get, url: "https://patch.cx/objects/spoof_redirect"} -> %Tesla.Env{ @@ -252,7 +266,7 @@ test "it does not fetch a spoofed object with wrong content type" do end test "it does not fetch a spoofed object with id different from URL" do - assert {:error, :id_mismatch} = + assert {:error, :not_found} = Fetcher.fetch_and_contain_remote_object_from_id( "https://patch.cx/media/03ca3c8b4ac3ddd08bf0f84be7885f2f88de0f709112131a22d83650819e36c2.json" ) @@ -264,19 +278,29 @@ test "it does not fetch a spoofed object with id different from URL" do end test "it does not fetch an object via cross-domain redirects (initial id)" do - assert {:error, {:cross_domain_redirect, true}} = + assert {:error, {:containment, _}} = Fetcher.fetch_and_contain_remote_object_from_id( "https://patch.cx/objects/spoof_media_redirect1" ) end - test "it does not fetch an object via cross-domain redirects (final id)" do - assert {:error, {:cross_domain_redirect, true}} = + test "it does not fetch an object via cross-domain redirect if the actor is from the original domain" do + assert {:error, {:containment, :error}} = Fetcher.fetch_and_contain_remote_object_from_id( "https://patch.cx/objects/spoof_media_redirect2" ) end + test "it allows cross-domain redirects when id and author are from final domain" do + assert {:ok, %{"id" => id, "attributedTo" => author}} = + Fetcher.fetch_and_contain_remote_object_from_id( + "https://patch.cx/objects/spoof_media_redirect3" + ) + + assert URI.parse(id).host == "media.patch.cx" + assert URI.parse(author).host == "media.patch.cx" + end + test "it accepts same-domain redirects" do assert {:ok, %{"id" => id} = _object} = Fetcher.fetch_and_contain_remote_object_from_id( diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex index 6a01393e3..ea06c4966 100644 --- a/test/support/http_request_mock.ex +++ b/test/support/http_request_mock.ex @@ -263,7 +263,12 @@ def get("https://n1u.moe/users/rye", _, _, @activitypub_accept_headers) do {:ok, %Tesla.Env{ status: 200, - body: File.read!("test/fixtures/tesla_mock/rye.json"), + body: + File.read!("test/fixtures/tesla_mock/rye.json") + |> Jason.decode!() + |> Map.put("name", "evil rye") + |> Map.put("bio", "boooo!") + |> Jason.encode!(), headers: activitypub_object_headers() }} end