diff --git a/lib/asciinema/asciicasts.ex b/lib/asciinema/asciicasts.ex index 628e598..23977d6 100644 --- a/lib/asciinema/asciicasts.ex +++ b/lib/asciinema/asciicasts.ex @@ -1,6 +1,6 @@ defmodule Asciinema.Asciicasts do import Ecto.Query, warn: false - alias Asciinema.{Repo, Asciicast, FileStore} + alias Asciinema.{Repo, Asciicast, FileStore, StringUtils} alias Asciinema.Asciicasts.PosterGenerator def get_asciicast!(id) when is_integer(id) do @@ -128,7 +128,8 @@ defmodule Asciinema.Asciicasts do defp open_stream_files(stdout_timing_path, stdout_data_path) do {open_stream_file(stdout_timing_path), - open_stream_file(stdout_data_path)} + open_stream_file(stdout_data_path), + ""} end defp open_stream_file(path) do @@ -146,13 +147,14 @@ defmodule Asciinema.Asciicasts do end end - defp generate_stream_elem({timing_file, data_file} = files) do + defp generate_stream_elem({timing_file, data_file, invalid_str} = files) do case IO.read(timing_file, :line) do line when is_binary(line) -> {delay, count} = parse_line(line) - case IO.read(data_file, count) do + case IO.binread(data_file, count) do text when is_binary(text) -> - {[{delay, text}], files} + {valid_str, invalid_str} = StringUtils.valid_part(invalid_str, text) + {[{delay, valid_str}], {timing_file, data_file, invalid_str}} otherwise -> {:error, otherwise} end @@ -161,7 +163,7 @@ defmodule Asciinema.Asciicasts do end end - defp close_stream_files({timing_file, data_file}) do + defp close_stream_files({timing_file, data_file, _}) do File.close(timing_file) File.close(data_file) end diff --git a/lib/asciinema/string_utils.ex b/lib/asciinema/string_utils.ex new file mode 100644 index 0000000..a456c49 --- /dev/null +++ b/lib/asciinema/string_utils.ex @@ -0,0 +1,22 @@ +defmodule Asciinema.StringUtils do + def valid_part(invalid_str, str) do + case String.chunk(invalid_str <> str, :valid) do + [] -> + {"", ""} + chunks -> + str = + chunks + |> Enum.take(Enum.count(chunks) - 1) + |> Enum.filter(&String.valid?/1) + |> Enum.join + + last = Enum.at(chunks, -1) + + if String.valid?(last) do + {str <> last, ""} + else + {str, last} + end + end + end +end diff --git a/spec/fixtures/0.9.8/stdout-split b/spec/fixtures/0.9.8/stdout-split new file mode 100644 index 0000000..29ee40a Binary files /dev/null and b/spec/fixtures/0.9.8/stdout-split differ diff --git a/spec/fixtures/0.9.8/stdout-split.time b/spec/fixtures/0.9.8/stdout-split.time new file mode 100644 index 0000000..01c8c3e Binary files /dev/null and b/spec/fixtures/0.9.8/stdout-split.time differ diff --git a/test/asciinema/asciicasts_test.exs b/test/asciinema/asciicasts_test.exs index 276557c..c2ff994 100644 --- a/test/asciinema/asciicasts_test.exs +++ b/test/asciinema/asciicasts_test.exs @@ -131,5 +131,12 @@ defmodule Asciinema.AsciicastsTest do assert :ok == Stream.run(stream) assert [{1.234567, "foobar"}, {0.123456, "baz"}] == Enum.take(stream, 2) end + + test "with bzipped files (utf-8 sequence split between frames)" do + stream = Asciicasts.stdout_stream("spec/fixtures/0.9.8/stdout-split.time", + "spec/fixtures/0.9.8/stdout-split") + assert :ok == Stream.run(stream) + assert [{1.234567, "xxżó"}, {0.123456, "łć"}, {2.0, "xx"}] == Enum.take(stream, 3) + end end end diff --git a/test/asciinema/string_utils_test.exs b/test/asciinema/string_utils_test.exs new file mode 100644 index 0000000..7e45e66 --- /dev/null +++ b/test/asciinema/string_utils_test.exs @@ -0,0 +1,27 @@ +defmodule Asciinema.StringUtilsTest do + use ExUnit.Case + + describe "valid_part/2" do + import Asciinema.StringUtils, only: [valid_part: 2] + + test "no accumulator, valid string" do + assert valid_part("", "foo") == {"foo", ""} + end + + test "no accumulator, partial utf-8 seq" do + assert valid_part("", <<0xc5>>) == {"", <<0xc5>>} + end + + test "no accumulator, valid string + partial utf-8 seq at the end" do + assert valid_part("", "foo" <> <<0xc5>>) == {"foo", <<0xc5>>} + end + + test "with accumulator, rest of utf-8 seq + valid string at the end" do + assert valid_part(<<0xc5>>, <<0x82>> <> "ćfoo") == {"łćfoo", ""} + end + + test "with accumulator, mixed valid/invalid string + partial utf-8 seq at the end" do + assert valid_part(<<0xc5>>, "x" <> <<0xc5, 0xc5>> <> "y" <> <<0xc5>>) == {"xy", <<0xc5>>} + end + end +end