properly escape all unprintable characters

writing messages should no be in line with gettext tools. I tested
using msgcat, it provides the same results.

For some common use-cases I wrote explicit tests, for uncommon and
even unwanted use-cases I wrote one test to make sure pofile works
like msgcat for those messages
This commit is contained in:
Julian Bäume 2014-06-23 18:24:02 +02:00
parent 4cfebdee80
commit e1742e66a6
3 changed files with 76 additions and 6 deletions

@ -236,8 +236,27 @@ PO.Item.prototype.toString = function () {
// reverse what extract(string) method during PO.parse does
var _escape = function (string) {
string = string.replace(/\\/g, '\\\\');
return string.replace(/"/g, '\\"');
// don't unescape \n, since string can never contain it
// since split('\n') is called on it
string = string.replace(/[\x07\b\t\v\f\r"\\]/g, function (match) {
switch (match) {
case '\x07':
return '\\a';
case '\b':
return '\\b';
case '\t':
return '\\t';
case '\v':
return '\\v';
case '\f':
return '\\f';
case '\r':
return '\\r';
default:
return '\\' + match;
}
});
return string;
};
var _process = function (keyword, text, i) {
@ -286,6 +305,7 @@ PO.Item.prototype.toString = function () {
} else {
text = isArray(text) ? text.join() : text;
var processed = _process(keyword, text);
//handle \n in single-line texts (can not be handled in _escape)
for (var i = 1; i < processed.length - 1; i++) {
processed[i] = processed[i].slice(0, -1) + '\\n"';
}

@ -17,3 +17,43 @@ msgstr ""
msgid "The name field must not contain characters like \" or \\"
msgstr ""
# possibility to reorder items depending on locale
#. Format of addresses
#. %1$s is the street
#. %2$s is the postal code
#. %3$s is the city
#. %4$s is the state
#. %5$s is the country
msgid ""
"%1$s\n"
"%2$s %3$s\n"
"%4$s\n"
"%5$s"
msgstr ""
# "i18"ned code
#. used in <pre> environment, so don't remove any control sequences
msgid ""
"define('some/test/module', function () {\n"
"\t'use strict';\n"
"\treturn {};\n"
"});\n"
""
msgstr ""
"define('random/test/file', function () {\n"
"\t'use strict';\n"
"\treturn {};\n"
"});\n"
""
# all one-letter escape characters
# be aware, that \a, \b, \v, \f and \r should not be used
# in i18ned messages (according to gettext tools)
# however, they should be properly parsed, anyway
msgid ""
"\a\b\t\n"
"\v\f\r"
msgstr ""
"\a\b\t\n"
"\v\f\r"

@ -133,19 +133,29 @@ describe('Parse', function () {
});
describe('C-Strings', function () {
var po = PO.parse(fs.readFileSync(__dirname + '/fixtures/c-strings.po', 'utf8'));
it('should parse the c-strings.po file', function () {
var po = PO.parse(fs.readFileSync(__dirname + '/fixtures/c-strings.po', 'utf8'));
assert.notEqual(po, null);
});
it('should extract strings containing " and \\ characters', function () {
var po = PO.parse(fs.readFileSync(__dirname + '/fixtures/c-strings.po', 'utf8'));
var items = po.items.filter(function (item) {
return (/^The name field must not contain/).test(item.msgid);
});
assert.equal(items[0].msgid, 'The name field must not contain characters like " or \\');
});
it('should handle \n characters', function () {
var item = po.items[1];
assert.equal(item.msgid, '%1$s\n%2$s %3$s\n%4$s\n%5$s');
});
it('should handle \t characters', function () {
var item = po.items[2];
assert.equal(item.msgid, 'define(\'some/test/module\', function () {\n' +
'\t\'use strict\';\n' +
'\treturn {};\n' +
'});\n');
});
});
});