From cb796ccd0919e5a698e76091753efa8e464527ee Mon Sep 17 00:00:00 2001 From: aszlig Date: Thu, 26 May 2016 14:14:07 +0200 Subject: [PATCH] nixos/test-driver/Logger: Replace invalid UTF-8 Regression introduced by d84741a4bfb6ed2531ec7154479c147d2c9a737c. The mentioned commit actually is a good thing, because we now get the output from the X session. Unfortunately, for the i3wm test, the i3-config-wizard prints out the raw keyboard symbols directly coming from xcb, so the output isn't necessarily proper UTF-8. As the XML::Writer already expects valid UTF-8 input, we assume that everything that comes into sanitise() will be UTF-8 from the start. So we just decode() it using FB_DEFAULT as the check argument so that every invalid character is replaced by the unicode replacement character: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character We simply re-oncode it again afterwards and return it, so we should always get out valid UTF-8 in the log XML. For more information about FB_DEFAULT and FB_CROAK, have a look at: http://search.cpan.org/~dankogai/Encode-2.84/Encode.pm#Handling_Malformed_Data Signed-off-by: aszlig --- nixos/lib/test-driver/Logger.pm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nixos/lib/test-driver/Logger.pm b/nixos/lib/test-driver/Logger.pm index 6e62fdfd7708..3fe5ef67c144 100644 --- a/nixos/lib/test-driver/Logger.pm +++ b/nixos/lib/test-driver/Logger.pm @@ -3,6 +3,7 @@ package Logger; use strict; use Thread::Queue; use XML::Writer; +use Encode qw(decode encode); sub new { my ($class) = @_; @@ -56,7 +57,8 @@ sub nest { sub sanitise { my ($s) = @_; $s =~ s/[[:cntrl:]\xff]//g; - return $s; + $s = decode('UTF-8', $s, Encode::FB_DEFAULT); + return encode('UTF-8', $s, Encode::FB_CROAK); } sub log {