/** * Copyright 2009 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS-IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ const Changeset = require('ep_etherpad-lite/static/js/Changeset'); const padManager = require('../db/PadManager'); const _ = require('underscore'); const Security = require('ep_etherpad-lite/static/js/security'); const hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); const eejs = require('ep_etherpad-lite/node/eejs'); const _analyzeLine = require('./ExportHelper')._analyzeLine; const _encodeWhitespace = require('./ExportHelper')._encodeWhitespace; const padutils = require('../../static/js/pad_utils').padutils; async function getPadHTML(pad, revNum) { let atext = pad.atext; // fetch revision atext if (revNum != undefined) { atext = await pad.getInternalRevisionAText(revNum); } // convert atext to html return await getHTMLFromAtext(pad, atext); } exports.getPadHTML = getPadHTML; exports.getHTMLFromAtext = getHTMLFromAtext; async function getHTMLFromAtext(pad, atext, authorColors) { const apool = pad.apool(); const textLines = atext.text.slice(0, -1).split('\n'); const attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); const tags = ['h1', 'h2', 'strong', 'em', 'u', 's']; const props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough']; await Promise.all([ // prepare tags stored as ['tag', true] to be exported hooks.aCallAll('exportHtmlAdditionalTags', pad).then((newProps) => { newProps.forEach((prop) => { tags.push(prop); props.push(prop); }); }), // prepare tags stored as ['tag', 'value'] to be exported. This will generate HTML with tags // like hooks.aCallAll('exportHtmlAdditionalTagsWithData', pad).then((newProps) => { newProps.forEach((prop) => { tags.push(`span data-${prop[0]}="${prop[1]}"`); props.push(prop); }); }), ]); // holds a map of used styling attributes (*1, *2, etc) in the apool // and maps them to an index in props // *3:2 -> the attribute *3 means strong // *2:5 -> the attribute *2 means s(trikethrough) const anumMap = {}; let css = ''; const stripDotFromAuthorID = function (id) { return id.replace(/\./g, '_'); }; if (authorColors) { css += ''; } // iterates over all props(h1,h2,strong,...), checks if it is used in // this pad, and if yes puts its attrib id->props value into anumMap props.forEach((propName, i) => { let attrib = [propName, true]; if (_.isArray(propName)) { // propName can be in the form of ['color', 'red'], // see hook exportHtmlAdditionalTagsWithData attrib = propName; } const propTrueNum = apool.putAttrib(attrib, true); if (propTrueNum >= 0) { anumMap[propTrueNum] = i; } }); function getLineHTML(text, attribs) { // Use order of tags (b/i/u) as order of nesting, for simplicity // and decent nesting. For example, // Just bold Bold and italics Just italics // becomes // Just bold Bold and italics Just italics const taker = Changeset.stringIterator(text); const assem = Changeset.stringAssembler(); const openTags = []; function getSpanClassFor(i) { // return if author colors are disabled if (!authorColors) return false; const property = props[i]; // we are not insterested on properties in the form of ['color', 'red'], // see hook exportHtmlAdditionalTagsWithData if (_.isArray(property)) { return false; } if (property.substr(0, 6) === 'author') { return stripDotFromAuthorID(property); } if (property === 'removed') { return 'removed'; } return false; } // tags added by exportHtmlAdditionalTagsWithData will be exported as with // data attributes function isSpanWithData(i) { const property = props[i]; return _.isArray(property); } function emitOpenTag(i) { openTags.unshift(i); const spanClass = getSpanClassFor(i); if (spanClass) { assem.append(''); } else { assem.append('<'); assem.append(tags[i]); assem.append('>'); } } // this closes an open tag and removes its reference from openTags function emitCloseTag(i) { openTags.shift(); const spanClass = getSpanClassFor(i); const spanWithData = isSpanWithData(i); if (spanClass || spanWithData) { assem.append(''); } else { assem.append(''); } } const urls = padutils.findURLs(text); let idx = 0; function processNextChars(numChars) { if (numChars <= 0) { return; } const iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars)); idx += numChars; // this iterates over every op string and decides which tags to open or to close // based on the attribs used while (iter.hasNext()) { const o = iter.next(); var usedAttribs = []; // mark all attribs as used Changeset.eachAttribNumber(o.attribs, (a) => { if (a in anumMap) { usedAttribs.push(anumMap[a]); // i = 0 => bold, etc. } }); let outermostTag = -1; // find the outer most open tag that is no longer used for (var i = openTags.length - 1; i >= 0; i--) { if (usedAttribs.indexOf(openTags[i]) === -1) { outermostTag = i; break; } } // close all tags upto the outer most if (outermostTag !== -1) { while (outermostTag >= 0) { emitCloseTag(openTags[0]); outermostTag--; } } // open all tags that are used but not open for (i = 0; i < usedAttribs.length; i++) { if (openTags.indexOf(usedAttribs[i]) === -1) { emitOpenTag(usedAttribs[i]); } } let chars = o.chars; if (o.lines) { chars--; // exclude newline at end of line, if present } let s = taker.take(chars); // removes the characters with the code 12. Don't know where they come // from but they break the abiword parser and are completly useless s = s.replace(String.fromCharCode(12), ''); assem.append(_encodeWhitespace(Security.escapeHTML(s))); } // end iteration over spans in line // close all the tags that are open after the last op while (openTags.length > 0) { emitCloseTag(openTags[0]); } } // end processNextChars if (urls) { urls.forEach((urlData) => { const startIndex = urlData[0]; const url = urlData[1]; const urlLength = url.length; processNextChars(startIndex - idx); // Using rel="noreferrer" stops leaking the URL/location of the exported HTML when clicking links in the document. // Not all browsers understand this attribute, but it's part of the HTML5 standard. // https://html.spec.whatwg.org/multipage/links.html#link-type-noreferrer // Additionally, we do rel="noopener" to ensure a higher level of referrer security. // https://html.spec.whatwg.org/multipage/links.html#link-type-noopener // https://mathiasbynens.github.io/rel-noopener/ // https://github.com/ether/etherpad-lite/pull/3636 assem.append(``); processNextChars(urlLength); assem.append(''); }); } processNextChars(text.length - idx); return _processSpaces(assem.toString()); } // end getLineHTML const pieces = [css]; // Need to deal with constraints imposed on HTML lists; can // only gain one level of nesting at once, can't change type // mid-list, etc. // People might use weird indenting, e.g. skip a level, // so we want to do something reasonable there. We also // want to deal gracefully with blank lines. // => keeps track of the parents level of indentation let openLists = []; for (let i = 0; i < textLines.length; i++) { var context; var line = _analyzeLine(textLines[i], attribLines[i], apool); const lineContent = getLineHTML(line.text, line.aline); if (line.listLevel)// If we are inside a list { context = { line, lineContent, apool, attribLine: attribLines[i], text: textLines[i], padId: pad.id, }; let prevLine = null; let nextLine = null; if (i > 0) { prevLine = _analyzeLine(textLines[i - 1], attribLines[i - 1], apool); } if (i < textLines.length) { nextLine = _analyzeLine(textLines[i + 1], attribLines[i + 1], apool); } await hooks.aCallAll('getLineHTMLForExport', context); // To create list parent elements if ((!prevLine || prevLine.listLevel !== line.listLevel) || (prevLine && line.listTypeName !== prevLine.listTypeName)) { const exists = _.find(openLists, (item) => (item.level === line.listLevel && item.type === line.listTypeName)); if (!exists) { let prevLevel = 0; if (prevLine && prevLine.listLevel) { prevLevel = prevLine.listLevel; } if (prevLine && line.listTypeName !== prevLine.listTypeName) { prevLevel = 0; } for (var diff = prevLevel; diff < line.listLevel; diff++) { openLists.push({level: diff, type: line.listTypeName}); const prevPiece = pieces[pieces.length - 1]; if (prevPiece.indexOf('') === 0) { /* uncommenting this breaks nested ols.. if the previous item is NOT a ul, NOT an ol OR closing li then close the list so we consider this HTML, I inserted ** where it throws a problem in Example Wrong..
  1. one
    1. 1.1
      1. 1.1.1
  2. two
Note that closing the li then re-opening for another li item here is wrong. The correct markup is
  1. one
    1. 1.1
      1. 1.1.1
  2. two
Exmaple Right:
  1. one
    1. 1.1
      1. 1.1.1
  2. two
Example Wrong:
  1. one
  2. ****
    1. 1.1
    2. ****
      1. 1.1.1
  3. two
So it's firing wrong where the current piece is an li and the previous piece is an ol and next piece is an ol So to remedy this we can say if next piece is NOT an OL or UL. // pieces.push(""); */ if ((nextLine.listTypeName === 'number') && (nextLine.text === '')) { // is the listTypeName check needed here? null text might be completely fine! // TODO Check against Uls // don't do anything because the next item is a nested ol openener so we need to keep the li open } else { pieces.push('
  • '); } } if (line.listTypeName === 'number') { // We introduce line.start here, this is useful for continuing Ordered list line numbers // in case you have a bullet in a list IE you Want // 1. hello // * foo // 2. world // Without this line.start logic it would be // 1. hello * foo 1. world because the bullet would kill the OL // TODO: This logic could also be used to continue OL with indented content // but that's a job for another day.... if (line.start) { pieces.push(`
      `); } else { pieces.push(`
        `); } } else { pieces.push(`
          `); } } } } // if we're going up a level we shouldn't be adding.. if (context.lineContent) { pieces.push('
        • ', context.lineContent); } // To close list elements if (nextLine && nextLine.listLevel === line.listLevel && line.listTypeName === nextLine.listTypeName) { if (context.lineContent) { if ((nextLine.listTypeName === 'number') && (nextLine.text === '')) { // is the listTypeName check needed here? null text might be completely fine! // TODO Check against Uls // don't do anything because the next item is a nested ol openener so we need to keep the li open } else { pieces.push('
        • '); } } } if ((!nextLine || !nextLine.listLevel || nextLine.listLevel < line.listLevel) || (nextLine && line.listTypeName !== nextLine.listTypeName)) { let nextLevel = 0; if (nextLine && nextLine.listLevel) { nextLevel = nextLine.listLevel; } if (nextLine && line.listTypeName !== nextLine.listTypeName) { nextLevel = 0; } for (var diff = nextLevel; diff < line.listLevel; diff++) { openLists = openLists.filter((el) => el.level !== diff && el.type !== line.listTypeName); if (pieces[pieces.length - 1].indexOf(''); } if (line.listTypeName === 'number') { pieces.push('
      '); } else { pieces.push(''); } } } } else// outside any list, need to close line.listLevel of lists { context = { line, lineContent, apool, attribLine: attribLines[i], text: textLines[i], padId: pad.id, }; await hooks.aCallAll('getLineHTMLForExport', context); pieces.push(context.lineContent, '
      '); } } return pieces.join(''); } exports.getPadHTMLDocument = async function (padId, revNum) { const pad = await padManager.getPad(padId); // Include some Styles into the Head for Export let stylesForExportCSS = ''; const stylesForExport = await hooks.aCallAll('stylesForExport', padId); stylesForExport.forEach((css) => { stylesForExportCSS += css; }); let html = await getPadHTML(pad, revNum); for (const hookHtml of await hooks.aCallAll('exportHTMLAdditionalContent', {padId})) { html += hookHtml; } return eejs.require('ep_etherpad-lite/templates/export_html.html', { body: html, padId: Security.escapeHTML(padId), extraCSS: stylesForExportCSS, }); }; // copied from ACE function _processSpaces(s) { const doesWrap = true; if (s.indexOf('<') < 0 && !doesWrap) { // short-cut return s.replace(/ /g, ' '); } const parts = []; s.replace(/<[^>]*>?| |[^ <]+/g, (m) => { parts.push(m); }); if (doesWrap) { let endOfLine = true; let beforeSpace = false; // last space in a run is normal, others are nbsp, // end of line is nbsp for (var i = parts.length - 1; i >= 0; i--) { var p = parts[i]; if (p == ' ') { if (endOfLine || beforeSpace) parts[i] = ' '; endOfLine = false; beforeSpace = true; } else if (p.charAt(0) != '<') { endOfLine = false; beforeSpace = false; } } // beginning of line is nbsp for (i = 0; i < parts.length; i++) { p = parts[i]; if (p == ' ') { parts[i] = ' '; break; } else if (p.charAt(0) != '<') { break; } } } else { for (i = 0; i < parts.length; i++) { p = parts[i]; if (p == ' ') { parts[i] = ' '; } } } return parts.join(''); }