From 46120b16b62e1930920e815cbcba30761d232c27 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Mon, 3 May 2021 12:36:46 +0200 Subject: [PATCH] Fix merging URIs for media from url when doing a rich media preview Signed-off-by: Thomas Citharel --- lib/service/rich_media/favicon.ex | 16 ++++---------- lib/service/rich_media/parser.ex | 36 +++++++++++++++++-------------- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/lib/service/rich_media/favicon.ex b/lib/service/rich_media/favicon.ex index ddbbb2691..bdb146cf9 100644 --- a/lib/service/rich_media/favicon.ex +++ b/lib/service/rich_media/favicon.ex @@ -56,20 +56,12 @@ defmodule Mobilizon.Service.RichMedia.Favicon do @spec format_url(String.t(), String.t()) :: String.t() defp format_url(url, path) do - image_uri = URI.parse(path) - uri = URI.parse(url) - - cond do - is_nil(image_uri.host) -> "#{uri.scheme}://#{uri.host}#{correct_path(path)}" - is_nil(image_uri.scheme) -> "#{uri.scheme}:#{path}" - true -> path - end + url + |> URI.parse() + |> URI.merge(path) + |> to_string() end - # Sometimes paths have "/" in front, sometimes not - defp correct_path("/" <> _ = path), do: path - defp correct_path(path), do: "/#{path}" - @spec find_favicon_link_tag(String.t()) :: {:ok, tuple()} | {:error, any()} defp find_favicon_link_tag(html) do with {:ok, html} <- Floki.parse_document(html), diff --git a/lib/service/rich_media/parser.ex b/lib/service/rich_media/parser.ex index 1d45b53d5..879741482 100644 --- a/lib/service/rich_media/parser.ex +++ b/lib/service/rich_media/parser.ex @@ -19,6 +19,7 @@ defmodule Mobilizon.Service.RichMedia.Parser do alias Mobilizon.Config alias Mobilizon.Service.HTTP.RichMediaPreviewClient alias Mobilizon.Service.RichMedia.Favicon + alias Mobilizon.Service.RichMedia.Parsers.Fallback alias Plug.Conn.Utils require Logger @@ -211,9 +212,17 @@ defmodule Mobilizon.Service.RichMedia.Parser do data end - defp check_parsed_data(data) do - Logger.debug("Found metadata was invalid or incomplete: #{inspect(data)}") - {:error, :invalid_parsed_data} + defp check_parsed_data(data, html, first_run) do + # Maybe the first data found is incomplete, pass it through the Fallback parser once again + if first_run do + {:ok, data} = Fallback.parse(html, data) + Logger.debug("check parsed data") + Logger.debug(inspect(data)) + check_parsed_data(data, html, false) + else + Logger.debug("Found metadata was invalid or incomplete: #{inspect(data)}") + {:error, :invalid_parsed_data} + end end defp clean_parsed_data(data) do @@ -280,25 +289,20 @@ defmodule Mobilizon.Service.RichMedia.Parser do @spec check_remote_picture_path(map()) :: map() defp check_remote_picture_path(%{image_remote_url: image_remote_url, url: url} = data) do Logger.debug("Checking image_remote_url #{image_remote_url}") - image_uri = URI.parse(image_remote_url) - uri = URI.parse(url) - image_remote_url = - cond do - is_nil(image_uri.host) -> "#{uri.scheme}://#{uri.host}#{correct_path(image_remote_url)}" - is_nil(image_uri.scheme) -> "#{uri.scheme}:#{image_remote_url}" - true -> image_remote_url - end - - data = Map.put(data, :image_remote_url, image_remote_url) + data = Map.put(data, :image_remote_url, format_url(url, image_remote_url)) {:ok, data} end defp check_remote_picture_path(data), do: {:ok, data} - # Sometimes paths have "/" in front, sometimes not - defp correct_path("/" <> _ = path), do: path - defp correct_path(path), do: "/#{path}" + @spec format_url(String.t(), String.t()) :: String.t() + defp format_url(url, path) do + url + |> URI.parse() + |> URI.merge(path) + |> to_string() + end # Twitter requires a well-know crawler user-agent to show server-rendered data defp default_user_agent("https://twitter.com/" <> _) do