You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

391 lines
12 KiB

3 years ago
  1. const startTime = Date.now();
  2. const fs = require('fs');
  3. const ueberDB = require('../src/node_modules/ueberdb2');
  4. const mysql = require('../src/node_modules/ueberdb2/node_modules/mysql');
  5. const async = require('../src/node_modules/async');
  6. const Changeset = require('ep_etherpad-lite/static/js/Changeset');
  7. const randomString = require('ep_etherpad-lite/static/js/pad_utils').randomString;
  8. const AttributePool = require('ep_etherpad-lite/static/js/AttributePool');
  9. const settingsFile = process.argv[2];
  10. const sqlOutputFile = process.argv[3];
  11. // stop if the settings file is not set
  12. if (!settingsFile || !sqlOutputFile) {
  13. console.error('Use: node convert.js $SETTINGSFILE $SQLOUTPUT');
  14. process.exit(1);
  15. }
  16. log('read settings file...');
  17. // read the settings file and parse the json
  18. const settings = JSON.parse(fs.readFileSync(settingsFile, 'utf8'));
  19. log('done');
  20. log('open output file...');
  21. const sqlOutput = fs.openSync(sqlOutputFile, 'w');
  22. const sql = 'SET CHARACTER SET UTF8;\n' +
  23. 'CREATE TABLE IF NOT EXISTS `store` ( \n' +
  24. '`key` VARCHAR( 100 ) NOT NULL , \n' +
  25. '`value` LONGTEXT NOT NULL , \n' +
  26. 'PRIMARY KEY ( `key` ) \n' +
  27. ') ENGINE = INNODB;\n' +
  28. 'START TRANSACTION;\n\n';
  29. fs.writeSync(sqlOutput, sql);
  30. log('done');
  31. const etherpadDB = mysql.createConnection({
  32. host: settings.etherpadDB.host,
  33. user: settings.etherpadDB.user,
  34. password: settings.etherpadDB.password,
  35. database: settings.etherpadDB.database,
  36. port: settings.etherpadDB.port,
  37. });
  38. // get the timestamp once
  39. const timestamp = Date.now();
  40. let padIDs;
  41. async.series([
  42. // get all padids out of the database...
  43. function (callback) {
  44. log('get all padIds out of the database...');
  45. etherpadDB.query('SELECT ID FROM PAD_META', [], (err, _padIDs) => {
  46. padIDs = _padIDs;
  47. callback(err);
  48. });
  49. },
  50. function (callback) {
  51. log('done');
  52. // create a queue with a concurrency 100
  53. const queue = async.queue((padId, callback) => {
  54. convertPad(padId, (err) => {
  55. incrementPadStats();
  56. callback(err);
  57. });
  58. }, 100);
  59. // set the step callback as the queue callback
  60. queue.drain = callback;
  61. // add the padids to the worker queue
  62. for (let i = 0, length = padIDs.length; i < length; i++) {
  63. queue.push(padIDs[i].ID);
  64. }
  65. },
  66. ], (err) => {
  67. if (err) throw err;
  68. // write the groups
  69. let sql = '';
  70. for (const proID in proID2groupID) {
  71. const groupID = proID2groupID[proID];
  72. const subdomain = proID2subdomain[proID];
  73. sql += `REPLACE INTO store VALUES (${etherpadDB.escape(`group:${groupID}`)}, ${etherpadDB.escape(JSON.stringify(groups[groupID]))});\n`;
  74. sql += `REPLACE INTO store VALUES (${etherpadDB.escape(`mapper2group:subdomain:${subdomain}`)}, ${etherpadDB.escape(groupID)});\n`;
  75. }
  76. // close transaction
  77. sql += 'COMMIT;';
  78. // end the sql file
  79. fs.writeSync(sqlOutput, sql, undefined, 'utf-8');
  80. fs.closeSync(sqlOutput);
  81. log('finished.');
  82. process.exit(0);
  83. });
  84. function log(str) {
  85. console.log(`${(Date.now() - startTime) / 1000}\t${str}`);
  86. }
  87. let padsDone = 0;
  88. function incrementPadStats() {
  89. padsDone++;
  90. if (padsDone % 100 == 0) {
  91. const averageTime = Math.round(padsDone / ((Date.now() - startTime) / 1000));
  92. log(`${padsDone}/${padIDs.length}\t${averageTime} pad/s`);
  93. }
  94. }
  95. var proID2groupID = {};
  96. var proID2subdomain = {};
  97. var groups = {};
  98. function convertPad(padId, callback) {
  99. const changesets = [];
  100. const changesetsMeta = [];
  101. const chatMessages = [];
  102. const authors = [];
  103. let apool;
  104. let subdomain;
  105. let padmeta;
  106. async.series([
  107. // get all needed db values
  108. function (callback) {
  109. async.parallel([
  110. // get the pad revisions
  111. function (callback) {
  112. const sql = 'SELECT * FROM `PAD_CHAT_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_CHAT_META` WHERE ID=?)';
  113. etherpadDB.query(sql, [padId], (err, results) => {
  114. if (!err) {
  115. try {
  116. // parse the pages
  117. for (let i = 0, length = results.length; i < length; i++) {
  118. parsePage(chatMessages, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
  119. }
  120. } catch (e) { err = e; }
  121. }
  122. callback(err);
  123. });
  124. },
  125. // get the chat entries
  126. function (callback) {
  127. const sql = 'SELECT * FROM `PAD_REVS_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_REVS_META` WHERE ID=?)';
  128. etherpadDB.query(sql, [padId], (err, results) => {
  129. if (!err) {
  130. try {
  131. // parse the pages
  132. for (let i = 0, length = results.length; i < length; i++) {
  133. parsePage(changesets, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, false);
  134. }
  135. } catch (e) { err = e; }
  136. }
  137. callback(err);
  138. });
  139. },
  140. // get the pad revisions meta data
  141. function (callback) {
  142. const sql = 'SELECT * FROM `PAD_REVMETA_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_REVMETA_META` WHERE ID=?)';
  143. etherpadDB.query(sql, [padId], (err, results) => {
  144. if (!err) {
  145. try {
  146. // parse the pages
  147. for (let i = 0, length = results.length; i < length; i++) {
  148. parsePage(changesetsMeta, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
  149. }
  150. } catch (e) { err = e; }
  151. }
  152. callback(err);
  153. });
  154. },
  155. // get the attribute pool of this pad
  156. function (callback) {
  157. const sql = 'SELECT `JSON` FROM `PAD_APOOL` WHERE `ID` = ?';
  158. etherpadDB.query(sql, [padId], (err, results) => {
  159. if (!err) {
  160. try {
  161. apool = JSON.parse(results[0].JSON).x;
  162. } catch (e) { err = e; }
  163. }
  164. callback(err);
  165. });
  166. },
  167. // get the authors informations
  168. function (callback) {
  169. const sql = 'SELECT * FROM `PAD_AUTHORS_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_AUTHORS_META` WHERE ID=?)';
  170. etherpadDB.query(sql, [padId], (err, results) => {
  171. if (!err) {
  172. try {
  173. // parse the pages
  174. for (let i = 0, length = results.length; i < length; i++) {
  175. parsePage(authors, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
  176. }
  177. } catch (e) { err = e; }
  178. }
  179. callback(err);
  180. });
  181. },
  182. // get the pad information
  183. function (callback) {
  184. const sql = 'SELECT JSON FROM `PAD_META` WHERE ID=?';
  185. etherpadDB.query(sql, [padId], (err, results) => {
  186. if (!err) {
  187. try {
  188. padmeta = JSON.parse(results[0].JSON).x;
  189. } catch (e) { err = e; }
  190. }
  191. callback(err);
  192. });
  193. },
  194. // get the subdomain
  195. function (callback) {
  196. // skip if this is no proPad
  197. if (padId.indexOf('$') == -1) {
  198. callback();
  199. return;
  200. }
  201. // get the proID out of this padID
  202. const proID = padId.split('$')[0];
  203. const sql = 'SELECT subDomain FROM pro_domains WHERE ID = ?';
  204. etherpadDB.query(sql, [proID], (err, results) => {
  205. if (!err) {
  206. subdomain = results[0].subDomain;
  207. }
  208. callback(err);
  209. });
  210. },
  211. ], callback);
  212. },
  213. function (callback) {
  214. // saves all values that should be written to the database
  215. const values = {};
  216. // this is a pro pad, let's convert it to a group pad
  217. if (padId.indexOf('$') != -1) {
  218. const padIdParts = padId.split('$');
  219. const proID = padIdParts[0];
  220. const padName = padIdParts[1];
  221. let groupID;
  222. // this proID is not converted so far, do it
  223. if (proID2groupID[proID] == null) {
  224. groupID = `g.${randomString(16)}`;
  225. // create the mappers for this new group
  226. proID2groupID[proID] = groupID;
  227. proID2subdomain[proID] = subdomain;
  228. groups[groupID] = {pads: {}};
  229. }
  230. // use the generated groupID;
  231. groupID = proID2groupID[proID];
  232. // rename the pad
  233. padId = `${groupID}$${padName}`;
  234. // set the value for this pad in the group
  235. groups[groupID].pads[padId] = 1;
  236. }
  237. try {
  238. const newAuthorIDs = {};
  239. const oldName2newName = {};
  240. // replace the authors with generated authors
  241. // we need to do that cause where the original etherpad saves pad local authors, the new (lite) etherpad uses them global
  242. for (var i in apool.numToAttrib) {
  243. var key = apool.numToAttrib[i][0];
  244. const value = apool.numToAttrib[i][1];
  245. // skip non authors and anonymous authors
  246. if (key != 'author' || value == '') continue;
  247. // generate new author values
  248. const authorID = `a.${randomString(16)}`;
  249. const authorColorID = authors[i].colorId || Math.floor(Math.random() * (exports.getColorPalette().length));
  250. const authorName = authors[i].name || null;
  251. // overwrite the authorID of the attribute pool
  252. apool.numToAttrib[i][1] = authorID;
  253. // write the author to the database
  254. values[`globalAuthor:${authorID}`] = {colorId: authorColorID, name: authorName, timestamp};
  255. // save in mappers
  256. newAuthorIDs[i] = authorID;
  257. oldName2newName[value] = authorID;
  258. }
  259. // save all revisions
  260. for (var i = 0; i < changesets.length; i++) {
  261. values[`pad:${padId}:revs:${i}`] = {changeset: changesets[i],
  262. meta: {
  263. author: newAuthorIDs[changesetsMeta[i].a],
  264. timestamp: changesetsMeta[i].t,
  265. atext: changesetsMeta[i].atext || undefined,
  266. }};
  267. }
  268. // save all chat messages
  269. for (var i = 0; i < chatMessages.length; i++) {
  270. values[`pad:${padId}:chat:${i}`] = {text: chatMessages[i].lineText,
  271. userId: oldName2newName[chatMessages[i].userId],
  272. time: chatMessages[i].time};
  273. }
  274. // generate the latest atext
  275. const fullAPool = (new AttributePool()).fromJsonable(apool);
  276. const keyRev = Math.floor(padmeta.head / padmeta.keyRevInterval) * padmeta.keyRevInterval;
  277. let atext = changesetsMeta[keyRev].atext;
  278. let curRev = keyRev;
  279. while (curRev < padmeta.head) {
  280. curRev++;
  281. const changeset = changesets[curRev];
  282. atext = Changeset.applyToAText(changeset, atext, fullAPool);
  283. }
  284. values[`pad:${padId}`] = {atext,
  285. pool: apool,
  286. head: padmeta.head,
  287. chatHead: padmeta.numChatMessages};
  288. } catch (e) {
  289. console.error(`Error while converting pad ${padId}, pad skipped`);
  290. console.error(e.stack ? e.stack : JSON.stringify(e));
  291. callback();
  292. return;
  293. }
  294. let sql = '';
  295. for (var key in values) {
  296. sql += `REPLACE INTO store VALUES (${etherpadDB.escape(key)}, ${etherpadDB.escape(JSON.stringify(values[key]))});\n`;
  297. }
  298. fs.writeSync(sqlOutput, sql, undefined, 'utf-8');
  299. callback();
  300. },
  301. ], callback);
  302. }
  303. /**
  304. * This parses a Page like Etherpad uses them in the databases
  305. * The offsets describes the length of a unit in the page, the data are
  306. * all values behind each other
  307. */
  308. function parsePage(array, pageStart, offsets, data, json) {
  309. let start = 0;
  310. const lengths = offsets.split(',');
  311. for (let i = 0; i < lengths.length; i++) {
  312. let unitLength = lengths[i];
  313. // skip empty units
  314. if (unitLength == '') continue;
  315. // parse the number
  316. unitLength = Number(unitLength);
  317. // cut the unit out of data
  318. const unit = data.substr(start, unitLength);
  319. // put it into the array
  320. array[pageStart + i] = json ? JSON.parse(unit) : unit;
  321. // update start
  322. start += unitLength;
  323. }
  324. }