Fix utf-8 handling in stdout_stream/2

ex-snapshot
Marcin Kulik 7 years ago
parent 3fb1018803
commit c99e2ccaff

@ -1,6 +1,6 @@
defmodule Asciinema.Asciicasts do
import Ecto.Query, warn: false
alias Asciinema.{Repo, Asciicast, FileStore}
alias Asciinema.{Repo, Asciicast, FileStore, StringUtils}
alias Asciinema.Asciicasts.PosterGenerator
def get_asciicast!(id) when is_integer(id) do
@ -128,7 +128,8 @@ defmodule Asciinema.Asciicasts do
defp open_stream_files(stdout_timing_path, stdout_data_path) do
{open_stream_file(stdout_timing_path),
open_stream_file(stdout_data_path)}
open_stream_file(stdout_data_path),
""}
end
defp open_stream_file(path) do
@ -146,13 +147,14 @@ defmodule Asciinema.Asciicasts do
end
end
defp generate_stream_elem({timing_file, data_file} = files) do
defp generate_stream_elem({timing_file, data_file, invalid_str} = files) do
case IO.read(timing_file, :line) do
line when is_binary(line) ->
{delay, count} = parse_line(line)
case IO.read(data_file, count) do
case IO.binread(data_file, count) do
text when is_binary(text) ->
{[{delay, text}], files}
{valid_str, invalid_str} = StringUtils.valid_part(invalid_str, text)
{[{delay, valid_str}], {timing_file, data_file, invalid_str}}
otherwise ->
{:error, otherwise}
end
@ -161,7 +163,7 @@ defmodule Asciinema.Asciicasts do
end
end
defp close_stream_files({timing_file, data_file}) do
defp close_stream_files({timing_file, data_file, _}) do
File.close(timing_file)
File.close(data_file)
end

@ -0,0 +1,22 @@
defmodule Asciinema.StringUtils do
def valid_part(invalid_str, str) do
case String.chunk(invalid_str <> str, :valid) do
[] ->
{"", ""}
chunks ->
str =
chunks
|> Enum.take(Enum.count(chunks) - 1)
|> Enum.filter(&String.valid?/1)
|> Enum.join
last = Enum.at(chunks, -1)
if String.valid?(last) do
{str <> last, ""}
else
{str, last}
end
end
end
end

Binary file not shown.

Binary file not shown.

@ -131,5 +131,12 @@ defmodule Asciinema.AsciicastsTest do
assert :ok == Stream.run(stream)
assert [{1.234567, "foobar"}, {0.123456, "baz"}] == Enum.take(stream, 2)
end
test "with bzipped files (utf-8 sequence split between frames)" do
stream = Asciicasts.stdout_stream("spec/fixtures/0.9.8/stdout-split.time",
"spec/fixtures/0.9.8/stdout-split")
assert :ok == Stream.run(stream)
assert [{1.234567, "xxżó"}, {0.123456, "łć"}, {2.0, "xx"}] == Enum.take(stream, 3)
end
end
end

@ -0,0 +1,27 @@
defmodule Asciinema.StringUtilsTest do
use ExUnit.Case
describe "valid_part/2" do
import Asciinema.StringUtils, only: [valid_part: 2]
test "no accumulator, valid string" do
assert valid_part("", "foo") == {"foo", ""}
end
test "no accumulator, partial utf-8 seq" do
assert valid_part("", <<0xc5>>) == {"", <<0xc5>>}
end
test "no accumulator, valid string + partial utf-8 seq at the end" do
assert valid_part("", "foo" <> <<0xc5>>) == {"foo", <<0xc5>>}
end
test "with accumulator, rest of utf-8 seq + valid string at the end" do
assert valid_part(<<0xc5>>, <<0x82>> <> "ćfoo") == {"łćfoo", ""}
end
test "with accumulator, mixed valid/invalid string + partial utf-8 seq at the end" do
assert valid_part(<<0xc5>>, "x" <> <<0xc5, 0xc5>> <> "y" <> <<0xc5>>) == {"xy", <<0xc5>>}
end
end
end
Loading…
Cancel
Save