From: joey Date: Thu, 15 Jun 2006 06:37:33 +0000 (+0000) Subject: * -CSD does not affect modules, so readfile() was not using the utf-8 input X-Git-Url: https://sipb.mit.edu/gitweb.cgi/ikiwiki.git/commitdiff_plain/202c4d522f9f83d0e439b3846fb6a90e4eebee59?ds=sidebyside * -CSD does not affect modules, so readfile() was not using the utf-8 input layer, which led to lots of problems; make it force read files as utf-8. Closes: #373203 * writefile() likewise needs to use the utf8 output layer. * Remove the -CSD from ikiwiki's hashbang since it's useless to have it there. * Revert some of the decode_utf8 changes in CGI.pm that seem unnecessary given the readfile fix. * Add utf-8 testcases for readfile and htmlize. --- diff --git a/IkiWiki.pm b/IkiWiki.pm index 357c1cd2d..76472302e 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -163,7 +163,12 @@ sub readfile ($;$) { #{{{ local $/=undef; open (IN, $file) || error("failed to read $file: $!"); - binmode(IN) if $binary; + if (! $binary) { + binmode(IN, ":utf8"); + } + else { + binmode(IN); + } my $ret=; close IN; return $ret; @@ -195,7 +200,12 @@ sub writefile ($$$;$) { #{{{ } open (OUT, ">$destdir/$file") || error("failed to write $destdir/$file: $!"); - binmode(OUT) if $binary; + if (! $binary) { + binmode(OUT, ":utf8"); + } + else { + binmode(OUT); + } print OUT $content; close OUT; } #}}} diff --git a/IkiWiki/CGI.pm b/IkiWiki/CGI.pm index 4f4c48876..0d763caa3 100644 --- a/IkiWiki/CGI.pm +++ b/IkiWiki/CGI.pm @@ -43,6 +43,7 @@ sub cgi_recentchanges ($) { #{{{ styleurl => styleurl(), baseurl => "$config{url}/", ); + # XXX why is this needed? If it's raw utf-8 won't print DTRT? require Encode; print $q->header(-charset=>'utf-8'), Encode::decode_utf8($template->output); } #}}} @@ -353,12 +354,14 @@ sub cgi_editpage ($$) { #{{{ } elsif ($form->submitted eq "Preview") { require IkiWiki::Render; + # Apparently FormBuilder doesn't not treat input as + # utf-8, so decode from it. require Encode; my $content = Encode::decode_utf8($form->field('editcontent')); $form->field(name => "editcontent", value => $content, force => 1); $form->tmpl_param("page_preview", - Encode::decode_utf8(htmlize($config{default_pageext}, - linkify($page, $page, $content)))); + htmlize($config{default_pageext}, + linkify($page, $page, $content))); } else { $form->tmpl_param("page_preview", ""); @@ -421,8 +424,7 @@ sub cgi_editpage ($$) { #{{{ ! length $form->field('editcontent')) { my $content=""; if (exists $pagesources{lc($page)}) { - require Encode; - $content=Encode::decode_utf8(readfile(srcfile($pagesources{lc($page)}))); + $content=readfile(srcfile($pagesources{lc($page)})); $content=~s/\n/\r\n/g; } $form->field(name => "editcontent", value => $content, @@ -453,6 +455,7 @@ sub cgi_editpage ($$) { #{{{ } if (defined $form->field('comments') && length $form->field('comments')) { + # Decode utf-8 since FormBuilder does not. require Encode; $message.=Encode::decode_utf8(": ".$form->field('comments')); } diff --git a/debian/changelog b/debian/changelog index 28d423ba6..e5778c0bb 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,8 +2,17 @@ ikiwiki (1.6) UNRELEASED; urgency=low * YA utf-8 patch from Recai, this time to fix previewing a page so that the text in the input box is re-encoded back to utf-8. - - -- Joey Hess Wed, 14 Jun 2006 23:38:40 -0400 + * -CSD does not affect modules, so readfile() was not using the utf-8 input + layer, which led to lots of problems; make it force read files as utf-8. + Closes: #373203 + * writefile() likewise needs to use the utf8 output layer. + * Remove the -CSD from ikiwiki's hashbang since it's useless to have it + there. + * Revert some of the decode_utf8 changes in CGI.pm that seem unnecessary + given the readfile fix. + * Add utf-8 testcases for readfile and htmlize. + + -- Joey Hess Thu, 15 Jun 2006 01:46:03 -0400 ikiwiki (1.5) unstable; urgency=low diff --git a/ikiwiki b/ikiwiki index 61b66374d..e6c2567ba 100755 --- a/ikiwiki +++ b/ikiwiki @@ -1,4 +1,4 @@ -#!/usr/bin/perl -T -CSD +#!/usr/bin/perl -T $ENV{PATH}="/usr/local/bin:/usr/bin:/bin"; package IkiWiki; diff --git a/t/htmlize.t b/t/htmlize.t new file mode 100755 index 000000000..465004bc1 --- /dev/null +++ b/t/htmlize.t @@ -0,0 +1,19 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Test::More tests => 4; +use Encode; + +BEGIN { use_ok("IkiWiki"); } +BEGIN { use_ok("IkiWiki::Render"); } + +# Initialize htmlscrubber plugin +%IkiWiki::config=IkiWiki::defaultconfig(); +$IkiWiki::config{srcdir}=$IkiWiki::config{destdir}="/dev/null"; +IkiWiki::checkconfig(); + +is(IkiWiki::htmlize(".mdwn", "foo\n\nbar\n"), "

foo

\n\n

bar

\n", + "basic"); +is(IkiWiki::htmlize(".mdwn", IkiWiki::readfile("t/test1.mdwn")), + Encode::decode_utf8(qq{

o\nóóóóó

\n}), + "utf8; bug #373203"); diff --git a/t/readfile.t b/t/readfile.t new file mode 100755 index 000000000..5332f4c35 --- /dev/null +++ b/t/readfile.t @@ -0,0 +1,12 @@ +#!/usr/bin/perl +use warnings; +use strict; +use Test::More tests => 3; +use Encode; + +BEGIN { use_ok("IkiWiki"); } + +# should read files as utf8 +ok(Encode::is_utf8(IkiWiki::readfile("t/test1.mdwn"), 1)); +is(IkiWiki::readfile("t/test1.mdwn"), + Encode::decode_utf8('![o](../images/o.jpg "ó")'."\n".'óóóóó'."\n")); diff --git a/t/test1.mdwn b/t/test1.mdwn new file mode 100644 index 000000000..f4ebc2c08 --- /dev/null +++ b/t/test1.mdwn @@ -0,0 +1,2 @@ +![o](../images/o.jpg "ó") +óóóóó