parseUtils.js 9.5 KB

  1. "use strict";
  2. const fs = require('fs');
  3. const acorn = require('acorn');
  4. const walk = require('acorn-walk');
  5. module.exports = {
  6. parseBundle
  7. };
  8. function parseBundle(bundlePath) {
  9. const content = fs.readFileSync(bundlePath, 'utf8');
  10. const ast = acorn.parse(content, {
  11. sourceType: 'script',
  12. // I believe in a bright future of ECMAScript!
  13. // Actually, it's set to `2050` to support the latest ECMAScript version that currently exists.
  14. // Seems like `acorn` supports such weird option value.
  15. ecmaVersion: 2050
  16. });
  17. const walkState = {
  18. locations: null,
  19. expressionStatementDepth: 0
  20. };
  21. walk.recursive(ast, walkState, {
  22. ExpressionStatement(node, state, c) {
  23. if (state.locations) return;
  24. state.expressionStatementDepth++;
  25. if ( // Webpack 5 stores modules in the the top-level IIFE
  26. state.expressionStatementDepth === 1 && ast.body.includes(node) && isIIFE(node)) {
  27. const fn = getIIFECallExpression(node);
  28. if ( // It should not contain neither arguments
  29. fn.arguments.length === 0 && // ...nor parameters
  30. fn.callee.params.length === 0) {
  31. // Modules are stored in the very first variable declaration as hash
  32. const firstVariableDeclaration = fn.callee.body.body.find(node => node.type === 'VariableDeclaration');
  33. if (firstVariableDeclaration) {
  34. for (const declaration of firstVariableDeclaration.declarations) {
  35. if (declaration.init) {
  36. state.locations = getModulesLocations(declaration.init);
  37. if (state.locations) {
  38. break;
  39. }
  40. }
  41. }
  42. }
  43. }
  44. }
  45. if (!state.locations) {
  46. c(node.expression, state);
  47. }
  48. state.expressionStatementDepth--;
  49. },
  50. AssignmentExpression(node, state) {
  51. if (state.locations) return; // Modules are stored in exports.modules:
  52. // exports.modules = {};
  53. const {
  54. left,
  55. right
  56. } = node;
  57. if (left && left.object && === 'exports' && && === 'modules' && isModulesHash(right)) {
  58. state.locations = getModulesLocations(right);
  59. }
  60. },
  61. CallExpression(node, state, c) {
  62. if (state.locations) return;
  63. const args = node.arguments; // Main chunk with webpack loader.
  64. // Modules are stored in first argument:
  65. // (function (...) {...})(<modules>)
  66. if (node.callee.type === 'FunctionExpression' && ! && args.length === 1 && isSimpleModulesList(args[0])) {
  67. state.locations = getModulesLocations(args[0]);
  68. return;
  69. } // Async Webpack < v4 chunk without webpack loader.
  70. // webpackJsonp([<chunks>], <modules>, ...)
  71. // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.
  72. if (node.callee.type === 'Identifier' && mayBeAsyncChunkArguments(args) && isModulesList(args[1])) {
  73. state.locations = getModulesLocations(args[1]);
  74. return;
  75. } // Async Webpack v4 chunk without webpack loader.
  76. // (window.webpackJsonp=window.webpackJsonp||[]).push([[<chunks>], <modules>, ...]);
  77. // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.
  78. if (isAsyncChunkPushExpression(node)) {
  79. state.locations = getModulesLocations(args[0].elements[1]);
  80. return;
  81. } // Webpack v4 WebWorkerChunkTemplatePlugin
  82. // globalObject.chunkCallbackName([<chunks>],<modules>, ...);
  83. // Both globalObject and chunkCallbackName can be changed through the config, so we can't check them.
  84. if (isAsyncWebWorkerChunkExpression(node)) {
  85. state.locations = getModulesLocations(args[1]);
  86. return;
  87. } // Walking into arguments because some of plugins (e.g. `DedupePlugin`) or some Webpack
  88. // features (e.g. `umd` library output) can wrap modules list into additional IIFE.
  89. args.forEach(arg => c(arg, state));
  90. }
  91. });
  92. const modules = {};
  93. if (walkState.locations) {
  94. Object.entries(walkState.locations).forEach(([id, loc]) => {
  95. modules[id] = content.slice(loc.start, loc.end);
  96. });
  97. }
  98. return {
  99. modules,
  100. src: content,
  101. runtimeSrc: getBundleRuntime(content, walkState.locations)
  102. };
  103. }
  104. /**
  105. * Returns bundle source except modules
  106. */
  107. function getBundleRuntime(content, modulesLocations) {
  108. const sortedLocations = Object.values(modulesLocations || {}).sort((a, b) => a.start - b.start);
  109. let result = '';
  110. let lastIndex = 0;
  111. for (const {
  112. start,
  113. end
  114. } of sortedLocations) {
  115. result += content.slice(lastIndex, start);
  116. lastIndex = end;
  117. }
  118. return result + content.slice(lastIndex, content.length);
  119. }
  120. function isIIFE(node) {
  121. return node.type === 'ExpressionStatement' && (node.expression.type === 'CallExpression' || node.expression.type === 'UnaryExpression' && node.expression.argument.type === 'CallExpression');
  122. }
  123. function getIIFECallExpression(node) {
  124. if (node.expression.type === 'UnaryExpression') {
  125. return node.expression.argument;
  126. } else {
  127. return node.expression;
  128. }
  129. }
  130. function isModulesList(node) {
  131. return isSimpleModulesList(node) || // Modules are contained in expression `Array([minimum ID]).concat([<module>, <module>, ...])`
  132. isOptimizedModulesArray(node);
  133. }
  134. function isSimpleModulesList(node) {
  135. return (// Modules are contained in hash. Keys are module ids.
  136. isModulesHash(node) || // Modules are contained in array. Indexes are module ids.
  137. isModulesArray(node)
  138. );
  139. }
  140. function isModulesHash(node) {
  141. return node.type === 'ObjectExpression' && => node.value).every(isModuleWrapper);
  142. }
  143. function isModulesArray(node) {
  144. return node.type === 'ArrayExpression' && node.elements.every(elem => // Some of array items may be skipped because there is no module with such id
  145. !elem || isModuleWrapper(elem));
  146. }
  147. function isOptimizedModulesArray(node) {
  148. // Checking whether modules are contained in `Array(<minimum ID>).concat(...modules)` array:
  149. //
  150. // The `<minimum ID>` + array indexes are module ids
  151. return node.type === 'CallExpression' && node.callee.type === 'MemberExpression' && // Make sure the object called is `Array(<some number>)`
  152. node.callee.object.type === 'CallExpression' && node.callee.object.callee.type === 'Identifier' && === 'Array' && node.callee.object.arguments.length === 1 && isNumericId(node.callee.object.arguments[0]) && // Make sure the property X called for `Array(<some number>).X` is `concat`
  153. === 'Identifier' && === 'concat' && // Make sure exactly one array is passed in to `concat`
  154. node.arguments.length === 1 && isModulesArray(node.arguments[0]);
  155. }
  156. function isModuleWrapper(node) {
  157. return (// It's an anonymous function expression that wraps module
  158. (node.type === 'FunctionExpression' || node.type === 'ArrowFunctionExpression') && ! || // If `DedupePlugin` is used it can be an ID of duplicated module...
  159. isModuleId(node) || // or an array of shape [<module_id>, ...args]
  160. node.type === 'ArrayExpression' && node.elements.length > 1 && isModuleId(node.elements[0])
  161. );
  162. }
  163. function isModuleId(node) {
  164. return node.type === 'Literal' && (isNumericId(node) || typeof node.value === 'string');
  165. }
  166. function isNumericId(node) {
  167. return node.type === 'Literal' && Number.isInteger(node.value) && node.value >= 0;
  168. }
  169. function isChunkIds(node) {
  170. // Array of numeric or string ids. Chunk IDs are strings when NamedChunksPlugin is used
  171. return node.type === 'ArrayExpression' && node.elements.every(isModuleId);
  172. }
  173. function isAsyncChunkPushExpression(node) {
  174. const {
  175. callee,
  176. arguments: args
  177. } = node;
  178. return callee.type === 'MemberExpression' && === 'push' && callee.object.type === 'AssignmentExpression' && args.length === 1 && args[0].type === 'ArrayExpression' && mayBeAsyncChunkArguments(args[0].elements) && isModulesList(args[0].elements[1]);
  179. }
  180. function mayBeAsyncChunkArguments(args) {
  181. return args.length >= 2 && isChunkIds(args[0]);
  182. }
  183. function isAsyncWebWorkerChunkExpression(node) {
  184. const {
  185. callee,
  186. type,
  187. arguments: args
  188. } = node;
  189. return type === 'CallExpression' && callee.type === 'MemberExpression' && args.length === 2 && isChunkIds(args[0]) && isModulesList(args[1]);
  190. }
  191. function getModulesLocations(node) {
  192. if (node.type === 'ObjectExpression') {
  193. // Modules hash
  194. const modulesNodes =;
  195. return modulesNodes.reduce((result, moduleNode) => {
  196. const moduleId = || moduleNode.key.value;
  197. result[moduleId] = getModuleLocation(moduleNode.value);
  198. return result;
  199. }, {});
  200. }
  201. const isOptimizedArray = node.type === 'CallExpression';
  202. if (node.type === 'ArrayExpression' || isOptimizedArray) {
  203. // Modules array or optimized array
  204. const minId = isOptimizedArray ? // Get the [minId] value from the Array() call first argument literal value
  205. node.callee.object.arguments[0].value : // `0` for simple array
  206. 0;
  207. const modulesNodes = isOptimizedArray ? // The modules reside in the `concat()` function call arguments
  208. node.arguments[0].elements : node.elements;
  209. return modulesNodes.reduce((result, moduleNode, i) => {
  210. if (moduleNode) {
  211. result[i + minId] = getModuleLocation(moduleNode);
  212. }
  213. return result;
  214. }, {});
  215. }
  216. return {};
  217. }
  218. function getModuleLocation(node) {
  219. return {
  220. start: node.start,
  221. end: node.end
  222. };
  223. }