* -CSD does not affect modules, so readfile() was not using the utf-8 input
authorjoey <joey@0fa5a96a-9a0e-0410-b3b2-a0fd24251071>
Thu, 15 Jun 2006 06:37:33 +0000 (06:37 +0000)
committerjoey <joey@0fa5a96a-9a0e-0410-b3b2-a0fd24251071>
Thu, 15 Jun 2006 06:37:33 +0000 (06:37 +0000)
  layer, which led to lots of problems; make it force read files as utf-8.
Closes: #373203
* writefile() likewise needs to use the utf8 output layer.
* Remove the -CSD from ikiwiki's hashbang since it's useless to have it
  there.
* Revert some of the decode_utf8 changes in CGI.pm that seem unnecessary
  given the readfile fix.
* Add utf-8 testcases for readfile and htmlize.

IkiWiki.pm
IkiWiki/CGI.pm
debian/changelog
ikiwiki
t/htmlize.t [new file with mode: 0755]
t/readfile.t [new file with mode: 0755]
t/test1.mdwn [new file with mode: 0644]

index 357c1cd2d0ed0567538b41d585f7483d86238847..76472302e12a9bce6c3989e90969399184a9cafd 100644 (file)
@@ -163,7 +163,12 @@ sub readfile ($;$) { #{{{
        
        local $/=undef;
        open (IN, $file) || error("failed to read $file: $!");
-       binmode(IN) if $binary;
+       if (! $binary) {
+               binmode(IN, ":utf8");
+       }
+       else {
+               binmode(IN);
+       }
        my $ret=<IN>;
        close IN;
        return $ret;
@@ -195,7 +200,12 @@ sub writefile ($$$;$) { #{{{
        }
        
        open (OUT, ">$destdir/$file") || error("failed to write $destdir/$file: $!");
-       binmode(OUT) if $binary;
+       if (! $binary) {
+               binmode(OUT, ":utf8");
+       }
+       else {
+               binmode(OUT);
+       }
        print OUT $content;
        close OUT;
 } #}}}
index 4f4c488762ee477245cb3ad79dc39ae449fdac59..0d763caa38ec942e540a1b36b5e579c08841c054 100644 (file)
@@ -43,6 +43,7 @@ sub cgi_recentchanges ($) { #{{{
                styleurl => styleurl(),
                baseurl => "$config{url}/",
        );
+       # XXX why is this needed? If it's raw utf-8 won't print DTRT?
        require Encode;
        print $q->header(-charset=>'utf-8'), Encode::decode_utf8($template->output);
 } #}}}
@@ -353,12 +354,14 @@ sub cgi_editpage ($$) { #{{{
        }
        elsif ($form->submitted eq "Preview") {
                require IkiWiki::Render;
+               # Apparently FormBuilder doesn't not treat input as
+               # utf-8, so decode from it.
                require Encode;
                my $content = Encode::decode_utf8($form->field('editcontent'));
                $form->field(name => "editcontent", value => $content, force => 1);
                $form->tmpl_param("page_preview",
-                       Encode::decode_utf8(htmlize($config{default_pageext},
-                               linkify($page, $page, $content))));
+                       htmlize($config{default_pageext},
+                               linkify($page, $page, $content)));
        }
        else {
                $form->tmpl_param("page_preview", "");
@@ -421,8 +424,7 @@ sub cgi_editpage ($$) { #{{{
                            ! length $form->field('editcontent')) {
                                my $content="";
                                if (exists $pagesources{lc($page)}) {
-                                       require Encode;
-                                       $content=Encode::decode_utf8(readfile(srcfile($pagesources{lc($page)})));
+                                       $content=readfile(srcfile($pagesources{lc($page)}));
                                        $content=~s/\n/\r\n/g;
                                }
                                $form->field(name => "editcontent", value => $content,
@@ -453,6 +455,7 @@ sub cgi_editpage ($$) { #{{{
                }
                if (defined $form->field('comments') &&
                    length $form->field('comments')) {
+                       # Decode utf-8 since FormBuilder does not.
                        require Encode;
                        $message.=Encode::decode_utf8(": ".$form->field('comments'));
                }
index 28d423ba6c47a13578d2e3ef1375dfa55d8aef1c..e5778c0bb7463f75b89cc97d2fc152b3261b1dd2 100644 (file)
@@ -2,8 +2,17 @@ ikiwiki (1.6) UNRELEASED; urgency=low
 
   * YA utf-8 patch from Recai, this time to fix previewing a page so that
     the text in the input box is re-encoded back to utf-8.
-
- -- Joey Hess <joeyh@debian.org>  Wed, 14 Jun 2006 23:38:40 -0400
+  * -CSD does not affect modules, so readfile() was not using the utf-8 input
+    layer, which led to lots of problems; make it force read files as utf-8.
+    Closes: #373203
+  * writefile() likewise needs to use the utf8 output layer.
+  * Remove the -CSD from ikiwiki's hashbang since it's useless to have it
+    there.
+  * Revert some of the decode_utf8 changes in CGI.pm that seem unnecessary
+    given the readfile fix.
+  * Add utf-8 testcases for readfile and htmlize.
+
+ -- Joey Hess <joeyh@debian.org>  Thu, 15 Jun 2006 01:46:03 -0400
 
 ikiwiki (1.5) unstable; urgency=low
 
diff --git a/ikiwiki b/ikiwiki
index 61b66374de58924d081218292289c777306e8e0d..e6c2567baa42ce08e417b3fb2bd2c61b23172999 100755 (executable)
--- a/ikiwiki
+++ b/ikiwiki
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -T -CSD
+#!/usr/bin/perl -T
 $ENV{PATH}="/usr/local/bin:/usr/bin:/bin";
 
 package IkiWiki;
diff --git a/t/htmlize.t b/t/htmlize.t
new file mode 100755 (executable)
index 0000000..465004b
--- /dev/null
@@ -0,0 +1,19 @@
+#!/usr/bin/perl
+use warnings;
+use strict;
+use Test::More tests => 4;
+use Encode;
+
+BEGIN { use_ok("IkiWiki"); }
+BEGIN { use_ok("IkiWiki::Render"); }
+
+# Initialize htmlscrubber plugin
+%IkiWiki::config=IkiWiki::defaultconfig();
+$IkiWiki::config{srcdir}=$IkiWiki::config{destdir}="/dev/null";
+IkiWiki::checkconfig();
+
+is(IkiWiki::htmlize(".mdwn", "foo\n\nbar\n"), "<p>foo</p>\n\n<p>bar</p>\n",
+       "basic");
+is(IkiWiki::htmlize(".mdwn", IkiWiki::readfile("t/test1.mdwn")),
+       Encode::decode_utf8(qq{<p><img src="../images/o.jpg" alt="o" title="&oacute;" />\nóóóóó</p>\n}),
+       "utf8; bug #373203");
diff --git a/t/readfile.t b/t/readfile.t
new file mode 100755 (executable)
index 0000000..5332f4c
--- /dev/null
@@ -0,0 +1,12 @@
+#!/usr/bin/perl
+use warnings;
+use strict;
+use Test::More tests => 3;
+use Encode;
+
+BEGIN { use_ok("IkiWiki"); }
+
+# should read files as utf8
+ok(Encode::is_utf8(IkiWiki::readfile("t/test1.mdwn"), 1));
+is(IkiWiki::readfile("t/test1.mdwn"),
+       Encode::decode_utf8('![o](../images/o.jpg "ó")'."\n".'óóóóó'."\n"));
diff --git a/t/test1.mdwn b/t/test1.mdwn
new file mode 100644 (file)
index 0000000..f4ebc2c
--- /dev/null
@@ -0,0 +1,2 @@
+![o](../images/o.jpg "ó")
+óóóóó