From d8fc514359fdd4469c7e3dd735e9ac984fbda529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20B=C3=A4ume?= Date: Mon, 23 Jun 2014 12:52:10 +0200 Subject: [PATCH 1/3] don't remove \n characters from written po file in Item.toString, all \n characters are removed from the output. The gettext tools however leave those characters intact. This will now produce the same output as tools like msgcat. --- lib/po.js | 3 +++ test/write.js | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/po.js b/lib/po.js index e22db97..2bfc3e6 100644 --- a/lib/po.js +++ b/lib/po.js @@ -262,6 +262,9 @@ PO.Item.prototype.toString = function () { } else { text = isArray(text) ? text.join() : text; var processed = _process(keyword, text); + for (var i = 1; i < processed.length - 1; i++) { + processed[i] = processed[i].slice(0, -1) + '\\n"'; + } lines = lines.concat(mkObsolete + processed.join('\n' + mkObsolete)); } } diff --git a/test/write.js b/test/write.js index dd319ad..06edb97 100644 --- a/test/write.js +++ b/test/write.js @@ -97,7 +97,7 @@ describe('Write', function () { item.msgid = '\n should be written escaped'; assertHasLine(item.toString(), 'msgid ""'); - assertHasLine(item.toString(), '""'); + assertHasLine(item.toString(), '"\\n"'); assertHasLine(item.toString(), '" should be written escaped"'); }); From 4cfebdee80d80d806c6542ba962076893c358ee8 Mon Sep 17 00:00:00 2001 From: Candid Dauth Date: Sun, 23 Sep 2012 01:39:44 +0200 Subject: [PATCH 2/3] Fixed unescaping of all escaped C String characters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Julian Bäume Conflicts: lib/po.js --- lib/po.js | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/lib/po.js b/lib/po.js index 2bfc3e6..a337063 100644 --- a/lib/po.js +++ b/lib/po.js @@ -119,8 +119,32 @@ PO.parse = function (data) { function extract(string) { string = trim(string); string = string.replace(/^[^"]*"|"$/g, ''); - string = string.replace(/\\"/g, '"'); - string = string.replace(/\\\\/g, '\\'); + string = string.replace(/\\([abtnvfr'"\\?]|([0-7]{3})|x([0-9a-fA-F]{2}))/g, function (match, esc, oct, hex) { + if (oct) { + return String.fromCharCode(parseInt(oct, 8)); + } + if (hex) { + return String.fromCharCode(parseInt(hex, 16)); + } + switch (esc) { + case 'a': + return '\x07'; + case 'b': + return '\b'; + case 't': + return '\t'; + case 'n': + return '\n'; + case 'v': + return '\v'; + case 'f': + return '\f'; + case 'r': + return '\r'; + default: + return esc; + } + }); return string; } From e1742e66a6b0f09eab51356821ca6308e5ebc68b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20B=C3=A4ume?= Date: Mon, 23 Jun 2014 18:24:02 +0200 Subject: [PATCH 3/3] properly escape all unprintable characters writing messages should no be in line with gettext tools. I tested using msgcat, it provides the same results. For some common use-cases I wrote explicit tests, for uncommon and even unwanted use-cases I wrote one test to make sure pofile works like msgcat for those messages --- lib/po.js | 24 +++++++++++++++++++++-- test/fixtures/c-strings.po | 40 ++++++++++++++++++++++++++++++++++++++ test/parse.js | 18 +++++++++++++---- 3 files changed, 76 insertions(+), 6 deletions(-) diff --git a/lib/po.js b/lib/po.js index a337063..b7a6866 100644 --- a/lib/po.js +++ b/lib/po.js @@ -236,8 +236,27 @@ PO.Item.prototype.toString = function () { // reverse what extract(string) method during PO.parse does var _escape = function (string) { - string = string.replace(/\\/g, '\\\\'); - return string.replace(/"/g, '\\"'); + // don't unescape \n, since string can never contain it + // since split('\n') is called on it + string = string.replace(/[\x07\b\t\v\f\r"\\]/g, function (match) { + switch (match) { + case '\x07': + return '\\a'; + case '\b': + return '\\b'; + case '\t': + return '\\t'; + case '\v': + return '\\v'; + case '\f': + return '\\f'; + case '\r': + return '\\r'; + default: + return '\\' + match; + } + }); + return string; }; var _process = function (keyword, text, i) { @@ -286,6 +305,7 @@ PO.Item.prototype.toString = function () { } else { text = isArray(text) ? text.join() : text; var processed = _process(keyword, text); + //handle \n in single-line texts (can not be handled in _escape) for (var i = 1; i < processed.length - 1; i++) { processed[i] = processed[i].slice(0, -1) + '\\n"'; } diff --git a/test/fixtures/c-strings.po b/test/fixtures/c-strings.po index 597951e..d05b8e3 100644 --- a/test/fixtures/c-strings.po +++ b/test/fixtures/c-strings.po @@ -17,3 +17,43 @@ msgstr "" msgid "The name field must not contain characters like \" or \\" msgstr "" + +# possibility to reorder items depending on locale +#. Format of addresses +#. %1$s is the street +#. %2$s is the postal code +#. %3$s is the city +#. %4$s is the state +#. %5$s is the country +msgid "" +"%1$s\n" +"%2$s %3$s\n" +"%4$s\n" +"%5$s" +msgstr "" + +# "i18"ned code +#. used in
 environment, so don't remove any control sequences
+msgid ""
+"define('some/test/module', function () {\n"
+"\t'use strict';\n"
+"\treturn {};\n"
+"});\n"
+""
+msgstr ""
+"define('random/test/file', function () {\n"
+"\t'use strict';\n"
+"\treturn {};\n"
+"});\n"
+""
+
+# all one-letter escape characters
+# be aware, that \a, \b, \v, \f and \r should not be used
+# in i18ned messages (according to gettext tools)
+# however, they should be properly parsed, anyway
+msgid ""
+"\a\b\t\n"
+"\v\f\r"
+msgstr ""
+"\a\b\t\n"
+"\v\f\r"
diff --git a/test/parse.js b/test/parse.js
index d12ee10..f17c7af 100644
--- a/test/parse.js
+++ b/test/parse.js
@@ -133,19 +133,29 @@ describe('Parse', function () {
     });
 
     describe('C-Strings', function () {
+        var po = PO.parse(fs.readFileSync(__dirname + '/fixtures/c-strings.po', 'utf8'));
         it('should parse the c-strings.po file', function () {
-            var po = PO.parse(fs.readFileSync(__dirname + '/fixtures/c-strings.po', 'utf8'));
-
             assert.notEqual(po, null);
         });
 
         it('should extract strings containing " and \\ characters', function () {
-            var po = PO.parse(fs.readFileSync(__dirname + '/fixtures/c-strings.po', 'utf8'));
-
             var items = po.items.filter(function (item) {
                 return (/^The name field must not contain/).test(item.msgid);
             });
             assert.equal(items[0].msgid, 'The name field must not contain characters like " or \\');
         });
+
+        it('should handle \n characters', function () {
+            var item = po.items[1];
+            assert.equal(item.msgid, '%1$s\n%2$s %3$s\n%4$s\n%5$s');
+        });
+
+        it('should handle \t characters', function () {
+            var item = po.items[2];
+            assert.equal(item.msgid, 'define(\'some/test/module\', function () {\n' +
+                '\t\'use strict\';\n' +
+                '\treturn {};\n' +
+                '});\n');
+        });
     });
 });