Source: pageTransition.js

  1. /**
  2. * This module enables observing webpage transitions, synthesizing a range of
  3. * transition data that may be valuable for browser-based studies. See the
  4. * `onPageTransitionData` event for details.
  5. *
  6. * ## Types of Page Transition Data
  7. * This module supports several types of page transition data. Some types are
  8. * supported and recommended, because the data is consistently available, has
  9. * consistent meaning, and reflects discrete categories of user interactions.
  10. * Other types of transition data are supported because they appear in prior
  11. * academic literature, but we do not recommend them because of significant
  12. * limitations.
  13. * * Supported and Recommended Types of Page Transition Data
  14. * * WebExtensions Transitions - This module reports the same webpage
  15. * transition data provided by the WebExtensions `webNavigation` API. There
  16. * are two types of transition data: `TransitionType` (e.g., "link" or
  17. * "typed") and `TransitionQualifier` (e.g., "from_address_bar" or
  18. * "forward_back"). Note that Firefox's support for these values is mostly
  19. * but not entirely complete and defaults to a "link" transition type. The
  20. * MDN documentation for Firefox's implementation is also currently out of
  21. * date, see: https://github.com/mdn/browser-compat-data/issues/9019. We
  22. * recommend checking click transition data to confirm whether the user
  23. * clicked on a link.
  24. * * Tab-based Transitions - This module reports the webpage that was
  25. * previously loaded in a new webpage's tab. If the webpage is loading in a
  26. * newly created tab, this module reports the webpage that was open in
  27. * the opener tab. We recommend using tab-based transition data when the user
  28. * has clicked a link (according to both WebExtensions and click data), when
  29. * the user has navigated with forward and back buttons, and when the page
  30. * has refreshed (due to user action or automatically). In these situations,
  31. * there is a clear causal relationship between the previous and current
  32. * pages. We do not otherwise recommend using tab-based transition data,
  33. * because the user might be reusing a tab for reasons unrelated to the page
  34. * loaded in the tab.
  35. * * Click Transitions - This module reports when a click on a webpage is
  36. * immediately followed by a new webpage loading in the same tab (or a
  37. * newly opened tab were that tab is the opener). This activity indicates
  38. * the user likely clicked a link, and it compensates for limitations in
  39. * how browsers detect link clicks for the `webNavigation` API.
  40. * * Supported But Not Recommended Types of Page Transition Data
  41. * * Referrers - This module reports the HTTP referrer for each new page. While
  42. * referrers have long been a method for associating webpage loads with
  43. * prior pages, they are not consistently available (webpages and browsers
  44. * are increasingly limiting when referrers are sent), do not have consistent
  45. * content (similarly, webpages and browsers are increasingly limiting
  46. * referrers to just origins), and do not have consistent meaning (the rules
  47. * for setting referrers are notoriously complex and can have nonintuitive
  48. * semantics). Be especially careful with referrers for webpage loads via
  49. * the History API---because there is no new document-level HTTP request, the
  50. * referrer will not change when the URL changes.
  51. * * Time-based Transitions - This module reports the most recent webpage that
  52. * loaded in any tab. We do not recommend relying on this data, because a
  53. * chronological ordering of webpage loads may have no relation to user
  54. * activity or perception (e.g., a webpage might automatically reload in the
  55. * background before a user navigates to a new page).
  56. *
  57. * ## Page Transition Data Sources
  58. * This module builds on the page tracking provided by the `pageManager`
  59. * module and uses browser events, DOM events, and a set of heuristics to
  60. * associate transition information with each page visit. The module relies on
  61. * the following sources of data about page transitions, in addition to the
  62. * page visit tracking, attention tracking, and URL normalization provided by
  63. * `pageManager`:
  64. * * Background Script Data Sources
  65. * * `webNavigation.onCommitted` - provides tab ID, url,
  66. * `webNavigation.TransitionType`, and `webNavigation.TransitionQualifier`
  67. * values when a new page is loading in a tab.
  68. * * `webNavigation.onDOMContentLoaded` - provides tab ID, url, and a
  69. * timestamp approximating when the `DOMContentLoaded` event fired on a
  70. * page.
  71. * * `webNavigation.onHistoryStateUpdated` - provides tab ID, url,
  72. * `webNavigation.TransitionType`, and `webNavigation.TransitionQualifier`
  73. * values when a new page loads in a tab via the History API.
  74. * * `webNavigation.onCreatedNavigationTarget` - provides tab ID, source
  75. * tab ID, and url when a page loads in a tab newly created by another
  76. * tab. Because of a regression, this event does not currently fire
  77. * in Firefox for a click on a link with the target="_blank" attribute.
  78. * * `tabs.onCreated` - provides tab ID and source tab ID when a page
  79. * loads in a tab newly created by another tab, except if the new
  80. * tab is in a different window.
  81. * * Content Script Data Sources
  82. * * The `click` event on the `document` element - detects possible link
  83. * clicks via the mouse (e.g., left click).
  84. * * The `contextmenu` event on the `document` element - detects possible
  85. * link clicks via the mouse (e.g., right click or control + click).
  86. * * The `keyup` event on the document element - detects possible link
  87. * clicks via the keyboard.
  88. *
  89. * ## Combining Data Sources into a Page Transition
  90. * Merging these data sources into a page transition event poses several
  91. * challenges.
  92. * * We have to sync background script `webNavigation` events with content
  93. * scripts. As with `pageManager`, we have to account for the possibility
  94. * of race conditions between the background script and content script
  95. * environments. We use the same general approach in this module as in
  96. * `pageManager`, converting background script events into messages posted
  97. * to content scripts. We have to be a bit more careful about race
  98. * conditions than in `pageManager`, though, because if a tab property
  99. * event handled in that module goes to the wrong content script the
  100. * consequences are minimal (because correct event data will quickly
  101. * arrive afterward). In this module, by contrast, an error could mean
  102. * incorrectly associating a pair of pages. We further account for the
  103. * possibility of race conditions by matching the `webNavigation` URL and
  104. * DOMContentLoaded timestamp with the content script's URL and
  105. * DOMContentLoaded timestamp.
  106. * * We have to sync background script `webNavigation` events for different
  107. * stages in the webpage loading lifecycle, because we want properties of
  108. * both `webNavigation.onCommitted` and `webNavigation.onDOMContentLoaded`:
  109. * the former has transition types and qualifiers, while the latter has a
  110. * timestamp that is comparable to an event in the content script and does
  111. * not have the risk of firing before the content script is ready to
  112. * receive messages. Unlike `webRequest` events, `webNavigation` events are
  113. * not associated with unique identifiers. We accomplish syncing across
  114. * events by assuming that when the `webNavigation.onDOMContentLoaded` event
  115. * fires for a tab, it is part of the same navigation lifecycle as the most
  116. * recent `webNavigation.onCommitted` event in the tab.
  117. * * We have to sync content script data for a page with content script
  118. * data for a prior page (either loaded in the same tab, loaded in an
  119. * opener tab, or loaded immediately before in time). We accomplish this for
  120. * ordinary page loads by maintaining a cache of page visit data in the
  121. * in the background script. We accomplish this for History API page loads
  122. * by passing information in the content script environment.
  123. * * We have to account for a regression in Firefox where
  124. * `webNavigation.onCreatedNavigationTarget` does not currently fire for
  125. * a click on a link with the target="_blank" attribute. We accomplish this
  126. * by using `tabs.onCreated` event data when
  127. * `webNavigation.onCreatedNavigationTarget` event data is not available.
  128. *
  129. * @see {@link https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/webNavigation/onCommitted}
  130. * @see {@link https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/webNavigation/TransitionType}
  131. * @see {@link https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/webNavigation/TransitionQualifier}
  132. * @see {@link https://github.com/mdn/browser-compat-data/issues/9019}
  133. * @see {@link https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/tabs/onCreated}
  134. * @module pageTransition
  135. */
  136. import * as events from "./events.js";
  137. import * as permissions from "./permissions.js";
  138. import * as messaging from "./messaging.js";
  139. import * as matching from "./matching.js";
  140. import * as timing from "./timing.js";
  141. import * as pageManager from "./pageManager.js";
  142. import pageTransitionEventContentScript from "include:./content-scripts/pageTransition.event.content.js";
  143. import pageTransitionClickContentScript from "include:./content-scripts/pageTransition.click.content.js";
  144. /**
  145. * A listener for the `onPageTransitionData` event.
  146. * @callback pageTransitionDataListener
  147. * @memberof module:pageTransition.onPageTransitionData
  148. * @param {Object} details - Additional information about the page transition data event.
  149. * @param {string} details.pageId - The ID for the page, unique across browsing sessions.
  150. * @param {string} details.url - The URL of the page, without any hash.
  151. * @param {string} details.referrer - The referrer URL for the page, or `""` if there is no referrer. Note that we
  152. * recommend against using referrers for analyzing page transitions.
  153. * @param {number} details.tabId - The ID for the tab containing the page, unique to the browsing session. Note that if
  154. * you send a message to the content script in the tab, there is a possible race condition where the page in
  155. * the tab changes before your message arrives. You should specify a page ID (e.g., `pageId`) in your message to the
  156. * content script, and the content script should check that page ID against its current page ID to ensure that the
  157. * message was received by the intended page.
  158. * @param {boolean} details.isHistoryChange - Whether the page transition was caused by a URL change via the History API.
  159. * @param {boolean} details.isOpenedTab - Whether the page is loading in a tab that was newly opened from another tab.
  160. * @param {number} details.openerTabId - If the page is loading in a tab that was newly opened from another tab
  161. * (i.e., `isOpenedTab` is `true`), the tab ID of the opener tab. Otherwise, `tabs.TAB_ID_NONE`. Note that if
  162. * you send a message to the content script in the tab, there is a possible race condition where the page in
  163. * the tab changes before your message arrives. You should specify a page ID (e.g., `tabSourcePageId`) in your
  164. * message to the content script, and the content script should check that page ID against its current page ID to
  165. * ensure that the message was received by the intended page.
  166. * @param {string} details.transitionType - The transition type, from `webNavigation.onCommitted` or
  167. * `webNavigation.onHistoryStateUpdated`.
  168. * @param {string[]} details.transitionQualifiers - The transition qualifiers, from `webNavigation.onCommitted` or
  169. * `webNavigation.onHistoryStateUpdated`.
  170. * @param {string} details.tabSourcePageId - The ID for the most recent page in the same tab. If the page is opening
  171. * in a new tab, then the ID of the most recent page in the opener tab. The value is `""` if there is no such page.
  172. * @param {string} details.tabSourceUrl - The URL, without any hash, for the most recent page in the same tab. If the page
  173. * is opening in a new tab, then the URL of the most recent page in the opener tab. The value is `""` if there is no
  174. * such page.
  175. * @param {boolean} details.tabSourceClick - Whether the user recently clicked or pressed enter/return on the most recent
  176. * page in the same tab. If the page is loading in a tab that was newly opened by another tab, then whether the user
  177. * recently clicked or pressed enter/return on the most recent page in the opener tab. The value is `false` if there
  178. * is no such page.
  179. * @param {string} details.timeSourcePageId - The ID for the most recent page that loaded into any tab. If this is the
  180. * first page visit after the extension starts, the value is "". Note that we recommend against using time-based
  181. * page transition data.
  182. * @param {string} details.timeSourceUrl - The URL for the most recent page that loaded into any tab. If this is the
  183. * first page visit after the extension starts, the value is "". Note that we recommend against using time-based
  184. * page transition data.
  185. */
  186. /**
  187. * @typedef {Object} PageTransitionDataListenerRecord
  188. * @property {matching.MatchPatternSet} matchPatternSet - Match patterns for pages where the listener should be
  189. * notified about transition data.
  190. * @property {boolean} privateWindows - Whether to notify the listener about page transitions in
  191. * private windows and whether to consider pages loaded in private windows when generating
  192. * time-based transition information.
  193. * @property {browser.contentScripts.RegisteredContentScript} contentScript - The content
  194. * script associated with the listener.
  195. * @private
  196. */
  197. /**
  198. * A map where each key is a listener and each value is a record for that listener.
  199. * @constant {Map<pageTransitionDataListener, PageTransitionDataListenerRecord>}
  200. * @private
  201. */
  202. const pageTransitionDataListeners = new Map();
  203. /**
  204. * Add a listener for the `onPageTransitionData` event.
  205. * @function addListener
  206. * @memberof module:pageTransition.onPageTransitionData
  207. * @param {pageTransitionDataListener} listener - The listener to add.
  208. * @param {Object} options - Options for the listener.
  209. * @param {string[]} options.matchPatterns - Match patterns for pages where the listener should be notified about
  210. * transition data.
  211. * @param {boolean} [options.privateWindows=false] - Whether to notify the listener about page transitions in
  212. * private windows and whether to consider pages loaded in private windows when generating time-based
  213. * transition information.
  214. */
  215. /**
  216. * Remove a listener for the `onPageTransitionData` event.
  217. * @function removeListener
  218. * @memberof module:pageTransition.onPageTransitionData
  219. * @param {pageTransitionDataListener} listener - The listener to remove.
  220. */
  221. /**
  222. * Whether a specified listener has been added for the `onPageTransitionData` event.
  223. * @function hasListener
  224. * @memberof module:pageTransition.onPageTransitionData
  225. * @param {pageTransitionDataListener} listener - The listener to check.
  226. * @returns {boolean} Whether the listener has been added for the event.
  227. */
  228. /**
  229. * Whether the `onPageTransitionData` event has any listeners.
  230. * @function hasAnyListeners
  231. * @memberof module:pageTransition.onPageTransitionData
  232. * @returns {boolean} Whether the event has any listeners.
  233. */
  234. /**
  235. * An event that fires when data about a page transition is available. The event will fire after
  236. * the `pageManager.onPageVisitStart` event, when DOM content has loaded (for ordinary page loads)
  237. * or just after the URL changes (for History API page loads).
  238. * @namespace
  239. */
  240. export const onPageTransitionData = events.createEvent({
  241. name: "webScience.pageTransition.onPageTransitionData",
  242. addListenerCallback: addListener,
  243. removeListenerCallback: removeListener,
  244. notifyListenersCallback: () => { return false; }
  245. });
  246. /**
  247. * A callback function for adding a page transition data listener. The options for the listener must be kept in
  248. * sync with the public `onPageTransitionData.addListener` type.
  249. * @param {pageTransitionDataListener} listener - The listener being added.
  250. * @param {Object} options - Options for the listener.
  251. * @param {string[]} options.matchPatterns - Match patterns for pages where the listener should be notified about
  252. * transition data.
  253. * @param {boolean} [options.privateWindows=false] - Whether to notify the listener about page transitions in
  254. * private windows and whether to consider pages loaded in private windows when generating time-based
  255. * transition information.
  256. * @private
  257. */
  258. async function addListener(listener, {
  259. matchPatterns,
  260. privateWindows = false
  261. }) {
  262. await initialize();
  263. // Store a record for the listener
  264. pageTransitionDataListeners.set(listener, {
  265. // Compile the listener's match pattern set
  266. matchPatternSet: matching.createMatchPatternSet(matchPatterns),
  267. privateWindows,
  268. // Register the event content script with the listener's match patterns
  269. contentScript: await browser.contentScripts.register({
  270. matches: matchPatterns,
  271. js: [{
  272. file: pageTransitionEventContentScript
  273. }],
  274. runAt: "document_start"
  275. })
  276. });
  277. }
  278. /**
  279. * A callback function for removing a page transition data listener.
  280. * @param {pageTransitionDataListener} listener - The listener that is being removed.
  281. * @private
  282. */
  283. function removeListener(listener) {
  284. const listenerRecord = pageTransitionDataListeners.get(listener);
  285. if(listenerRecord === undefined) {
  286. return;
  287. }
  288. listenerRecord.contentScript.unregister();
  289. pageTransitionDataListeners.delete(listenerRecord);
  290. }
  291. /**
  292. * Whether the module has been initialized.
  293. * @type {boolean}
  294. * @private
  295. */
  296. let initialized = false;
  297. /**
  298. * Initialize the module, registering event handlers and message schemas.
  299. * @private
  300. */
  301. async function initialize() {
  302. if(initialized) {
  303. return;
  304. }
  305. initialized = true;
  306. permissions.check({
  307. module: "webScience.pageTransition",
  308. requiredPermissions: [ "webNavigation" ],
  309. suggestedOrigins: [ "<all_urls>" ]
  310. });
  311. await pageManager.initialize();
  312. // Register the click content script for all URLs permitted by the extension manifest
  313. await browser.contentScripts.register({
  314. matches: permissions.getManifestOriginMatchPatterns(),
  315. js: [{
  316. file: pageTransitionClickContentScript
  317. }],
  318. runAt: "document_start"
  319. });
  320. // When pageManager.onPageVisitStart fires...
  321. pageManager.onPageVisitStart.addListener(({ pageId, url, pageVisitStartTime, privateWindow, tabId }) => {
  322. // Add the page visit's page ID, URL, start time, and private window status to the time-based transition cache
  323. pageVisitTimeCache[pageId] = { url, pageVisitStartTime, privateWindow };
  324. // Add the page visit's tab ID, page ID, URL, and start time to the tab-based transition cache
  325. let cachedPageVisitsForTab = pageVisitTabCache.get(tabId);
  326. if(cachedPageVisitsForTab === undefined) {
  327. cachedPageVisitsForTab = { };
  328. pageVisitTabCache.set(tabId, cachedPageVisitsForTab);
  329. }
  330. cachedPageVisitsForTab[pageId] = { url, pageVisitStartTime, clickTimeStamps: [ ] };
  331. // We can't remove stale pages from the time-based and tab-based caches here, because otherwise we can
  332. // have a race condition where the most recent page in a cache (from pageManager.onPageVisitStart)
  333. // is the same page that's about to receive a message from the background script (because of
  334. // webNavigation.onDOMContentLoaded). In that situation, we might evict an older page from the cache
  335. // that was the correct page for time-based or tab-based transition information.
  336. });
  337. // When webNavigation.onCommitted fires, store the details in the per-tab onCommitted details cache
  338. browser.webNavigation.onCommitted.addListener(details => {
  339. // Ignore subframe navigation
  340. if(details.frameId !== 0) {
  341. return;
  342. }
  343. webNavigationOnCommittedCache.set(details.tabId, details);
  344. }, {
  345. url: [ { schemes: [ "http", "https" ] } ]
  346. });
  347. // When webNavigation.onDOMContentLoaded fires, pull the webNavigation.onCommitted
  348. // details from the per-tab cache and notify the content script
  349. browser.webNavigation.onDOMContentLoaded.addListener(details => {
  350. // Ignore subframe navigation
  351. if(details.frameId !== 0) {
  352. return;
  353. }
  354. // Get the cached webNavigation.onCommitted details and expire the cache
  355. const webNavigationOnCommittedDetails = webNavigationOnCommittedCache.get(details.tabId);
  356. if(webNavigationOnCommittedDetails === undefined) {
  357. return;
  358. }
  359. webNavigationOnCommittedCache.delete(details.tabId);
  360. // Confirm that the webNavigation.onCommitted URL matches the webNavigation.onDOMContentLoaded URL
  361. if(details.url !== webNavigationOnCommittedDetails.url) {
  362. return;
  363. }
  364. // Notify the content script
  365. sendUpdateToContentScript({
  366. tabId: details.tabId,
  367. url: details.url,
  368. timeStamp: timing.fromSystemClock(details.timeStamp),
  369. webNavigationTimeStamp: details.timeStamp,
  370. transitionType: webNavigationOnCommittedDetails.transitionType,
  371. transitionQualifiers: webNavigationOnCommittedDetails.transitionQualifiers,
  372. isHistoryChange: false
  373. });
  374. }, {
  375. url: [ { schemes: [ "http", "https" ] } ]
  376. });
  377. // When webNavigation.onHistoryStateUpdated fires, notify the content script
  378. browser.webNavigation.onHistoryStateUpdated.addListener(details => {
  379. // Ignore subframe navigation
  380. if(details.frameId !== 0) {
  381. return;
  382. }
  383. // Notify the content script
  384. sendUpdateToContentScript({
  385. tabId: details.tabId,
  386. url: details.url,
  387. timeStamp: timing.fromSystemClock(details.timeStamp),
  388. webNavigationTimeStamp: details.timeStamp,
  389. transitionType: details.transitionType,
  390. transitionQualifiers: details.transitionQualifiers,
  391. isHistoryChange: true
  392. });
  393. }, {
  394. url: [ { schemes: [ "http", "https" ] } ]
  395. });
  396. // Register the message schemas for background script updates
  397. messaging.registerSchema("webScience.pageTransition.backgroundScriptEventUpdate", {
  398. url: "string",
  399. timeStamp: "number",
  400. webNavigationTimeStamp: "number",
  401. transitionType: "string",
  402. transitionQualifiers: "object",
  403. pageVisitTimeCache: "object",
  404. cachedPageVisitsForTab: "object",
  405. isHistoryChange: "boolean",
  406. isOpenedTab: "boolean",
  407. openerTabId: "number",
  408. tabOpeningTimeStamp: "number"
  409. });
  410. // When webNavigation.onCreatedNavigationTarget fires, update the the opener tab cache.
  411. // This event fires for all opened tabs regardless of window, except for a regression
  412. // since Firefox 65 where the event does not fire for tabs opened by clicking a link
  413. // with a target="_blank" attribute. See https://github.com/mdn/content/issues/4507.
  414. // We observe those tab openings tabs.onCreated, since the tabs are always in the same
  415. // window. We do not use the URL from webNavigation.onCreatedNavigationTarget, because
  416. // an HTTP redirect might change the URL before webNavigation.onCommitted and
  417. // webNavigation.onDOMContentLoaded fire.
  418. browser.webNavigation.onCreatedNavigationTarget.addListener(details => {
  419. openerTabCache.set(details.tabId, {
  420. openerTabId: details.sourceTabId,
  421. timeStamp: details.timeStamp
  422. });
  423. }, {
  424. url: [ { schemes: [ "http", "https" ] } ]
  425. });
  426. // When tabs.onCreated fires, update the opener tab cache. This event fires for all opened
  427. // tabs in the same window, but not opened tabs in a new window. We observe tabs that open
  428. // in new windows with webNavigation.onCreatedNavigationTarget.
  429. browser.tabs.onCreated.addListener(tab => {
  430. // Ignore non-content tabs
  431. if(!("id" in tab) || (tab.id === browser.tabs.TAB_ID_NONE)) {
  432. return;
  433. }
  434. // Ignore tabs without content opener tabs
  435. if(!("openerTabId" in tab) || (tab.openerTabId === browser.tabs.TAB_ID_NONE)) {
  436. return;
  437. }
  438. // If we've already populated the opener tab cache for this tab with data from a more
  439. // detailed webNavigation.onCreatedNavigationTarget event, ignore this event
  440. if(openerTabCache.get(tab.id) !== undefined) {
  441. return;
  442. }
  443. openerTabCache.set(tab.id, {
  444. openerTabId: tab.openerTabId,
  445. timeStamp: timing.now()
  446. });
  447. });
  448. // When tabs.onRemoved fires, set a timeout to expire the tab-based transition information
  449. // and opener information for that tab
  450. browser.tabs.onRemoved.addListener(tabId => {
  451. setTimeout(() => {
  452. pageVisitTabCache.delete(tabId);
  453. openerTabCache.delete(tabId);
  454. }, tabRemovedExpiry);
  455. });
  456. // When the event content script sends an update message, notify the relevant listeners
  457. messaging.onMessage.addListener((eventUpdateMessage, sender) => {
  458. for(const [listener, listenerRecord] of pageTransitionDataListeners) {
  459. if(eventUpdateMessage.privateWindow && !listenerRecord.privateWindows) {
  460. continue;
  461. }
  462. if(listenerRecord.matchPatternSet.matches(eventUpdateMessage.url)) {
  463. listener({
  464. pageId: eventUpdateMessage.pageId,
  465. url: eventUpdateMessage.url,
  466. referrer: eventUpdateMessage.referrer,
  467. tabId: sender.tab.id,
  468. isHistoryChange: eventUpdateMessage.isHistoryChange,
  469. isOpenedTab: eventUpdateMessage.isOpenedTab,
  470. openerTabId: eventUpdateMessage.openerTabId,
  471. transitionType: eventUpdateMessage.transitionType,
  472. transitionQualifiers: eventUpdateMessage.transitionQualifiers.slice(),
  473. tabSourcePageId: eventUpdateMessage.tabSourcePageId,
  474. tabSourceUrl: eventUpdateMessage.tabSourceUrl,
  475. tabSourceClick: eventUpdateMessage.tabSourceClick,
  476. timeSourcePageId: listenerRecord.privateWindows ? eventUpdateMessage.timeSourcePageId : eventUpdateMessage.timeSourceNonPrivatePageId,
  477. timeSourceUrl: listenerRecord.privateWindows ? eventUpdateMessage.timeSourceUrl : eventUpdateMessage.timeSourceNonPrivateUrl
  478. });
  479. }
  480. }
  481. },
  482. {
  483. type: "webScience.pageTransition.contentScriptEventUpdate",
  484. schema: {
  485. pageId: "string",
  486. url: "string",
  487. referrer: "string",
  488. isHistoryChange: "boolean",
  489. isOpenedTab: "boolean",
  490. openerTabId: "number",
  491. transitionType: "string",
  492. transitionQualifiers: "object",
  493. tabSourcePageId: "string",
  494. tabSourceUrl: "string",
  495. tabSourceClick: "boolean",
  496. timeSourcePageId: "string",
  497. timeSourceUrl: "string",
  498. timeSourceNonPrivatePageId: "string",
  499. timeSourceNonPrivateUrl: "string",
  500. privateWindow: "boolean"
  501. }
  502. });
  503. // When the click content script sends an update message, update the tab-based transition cache
  504. messaging.onMessage.addListener((clickUpdateMessage, sender) => {
  505. // There should be a tab ID associated with the message, but might as well make certain
  506. if(!("tab" in sender) || !("id" in sender.tab)) {
  507. return;
  508. }
  509. // Update the cached link clicks for the page
  510. const cachedPageVisitsForTab = pageVisitTabCache.get(sender.tab.id);
  511. if((cachedPageVisitsForTab === undefined) || !(clickUpdateMessage.pageId in cachedPageVisitsForTab)) {
  512. return;
  513. }
  514. cachedPageVisitsForTab[clickUpdateMessage.pageId].clickTimeStamps = cachedPageVisitsForTab[clickUpdateMessage.pageId].clickTimeStamps.concat(clickUpdateMessage.clickTimeStamps);
  515. },
  516. {
  517. type: "webScience.pageTransition.contentScriptClickUpdate",
  518. schema: {
  519. pageId: "string",
  520. clickTimeStamps: "object"
  521. }
  522. });
  523. }
  524. /**
  525. * A map where keys are tab IDs and values are the most recent `webNavigation.onCommitted`
  526. * details, removed from the map when a subsequent `webNavigation.onDOMContentLoaded` fires
  527. * for the tab.
  528. * @constant {Map<number, Object>}
  529. * @private
  530. */
  531. const webNavigationOnCommittedCache = new Map();
  532. /**
  533. * A map, represented as an object, where keys are page IDs and values are objects with
  534. * `pageVisitStartTime`, `url`, and `privateWindow` properties from `pageManager.onPageVisitStart`.
  535. * We use an object so that it can be easily serialized. The reason we maintain this cache
  536. * is to account for possible race conditions between when pages load in the content script
  537. * environment and when the background script environment learns about page loads.
  538. * @constant {Object}
  539. * @private
  540. */
  541. const pageVisitTimeCache = { };
  542. /**
  543. * The maximum time, in milliseconds, to consider a page visit in any tab as a possible most
  544. * recent page visit in the content script environment, even though it's not the most recent
  545. * page visit in the background script environment.
  546. * @constant {number}
  547. * @private
  548. */
  549. const pageVisitTimeCacheExpiry = 1000;
  550. /**
  551. * @typedef {Object} PageVisitCachedDetails
  552. * @property {number} pageVisitStartTime - The page visit start time from `pageManager`.
  553. * @property {string} url - The URL from `pageManager`.
  554. * @property {number[]} clickTimeStamps - Timestamps for recent clicks on the page, from
  555. * the module's click content script.
  556. * @private
  557. */
  558. /**
  559. * A map where keys are tab IDs and values are maps, represented as objects, where keys
  560. * are page IDs and values are PageVisitCachedDetails objects.
  561. * @constant {Map<number, Object>}
  562. * @private
  563. */
  564. const pageVisitTabCache = new Map();
  565. /**
  566. * The maximum time, in milliseconds, to consider a page visit in a specific tab as a possible
  567. * most recent page visit for that tab in the content script environment, even though it's not
  568. * the most recent page visit for that tab in the background script environment.
  569. * @constant {number}
  570. * @private
  571. */
  572. const pageVisitTabCacheExpiry = 5000;
  573. /**
  574. * The maximum time, in milliseconds, to consider a click on a page as a possible most recent
  575. * click on the page in the content script environment, even though it's not the most recent
  576. * click in the background script environment.
  577. * @constant {number}
  578. * @private
  579. */
  580. const clickCacheExpiry = 5000;
  581. /**
  582. * The minimum time, in milliseconds, to wait after a tab is removed before expiring the cache
  583. * of page visits in that tab for tab-based transition information and the cached opener tab
  584. * for that tab.
  585. * @constant {number}
  586. * @private
  587. */
  588. const tabRemovedExpiry = 10000;
  589. /**
  590. * A map where keys are tab IDs and values are objects with `openerTabId` and `timeStamp`
  591. * properties.
  592. * @constant {Map<number, Object>}
  593. * @private
  594. */
  595. const openerTabCache = new Map();
  596. /**
  597. * Send an update to the content script running on a page, called when a
  598. * `webNavigation.onDOMContentLoaded` or `webNavigation.onHistoryStateUpdated`
  599. * event fires.
  600. * @param {Object} details - Details for the update to the content script.
  601. * @param {number} details.tabId - The tab ID for the tab where the page is loading.
  602. * @param {string} details.url - The URL for the page.
  603. * @param {number} details.timeStamp - The timestamp for the page that is loading, either from
  604. * `webNavigation.onDOMContentLoaded` or `webNavigation.onHistoryStateUpdated`, adjusted to
  605. * the shared monotonic clock.
  606. * @param {number} details.webNavigationTimeStamp - The timestamp for the page that is loading,
  607. * either from `webNavigation.onDOMContentLoaded` or `webNavigation.onHistoryStateUpdated`.
  608. * This timestamp, from the event, is on the system clock rather than the shared monotonic
  609. * clock.
  610. * @param {string} details.transitionType - The transition type for the page that is loading,
  611. * `webNavigation.onDOMContentLoaded` or `webNavigation.onHistoryStateUpdated`.
  612. * @param {string[]} details.transitionQualifiers - The transition qualifiers for the page
  613. * that is loading, either from `webNavigation.onDOMContentLoaded` or
  614. * `webNavigation.onHistoryStateUpdated`.
  615. * @param {boolean} details.isHistoryChange - Whether the update was caused by
  616. * `webNavigation.onDOMContentLoaded` (`false`) or `webNavigation.onHistoryStateUpdated`
  617. * (`true`).
  618. * @private
  619. */
  620. function sendUpdateToContentScript({
  621. tabId,
  622. url,
  623. timeStamp,
  624. webNavigationTimeStamp,
  625. transitionType,
  626. transitionQualifiers,
  627. isHistoryChange
  628. }) {
  629. // Retrieve cached page visits for this tab if this is not a History API change
  630. let cachedPageVisitsForTab = { };
  631. if(!isHistoryChange) {
  632. cachedPageVisitsForTab = pageVisitTabCache.get(tabId);
  633. }
  634. // Get the cached opener tab details if this is not a History API change
  635. let isOpenedTab = false;
  636. let openerTabId = browser.tabs.TAB_ID_NONE;
  637. let tabOpeningTimeStamp = 0;
  638. if(!isHistoryChange) {
  639. const openerTabDetails = openerTabCache.get(tabId);
  640. // If there are cached opener tab details, expire the cache and swap in the cached page
  641. // visits for the opener tab
  642. if(openerTabDetails !== undefined) {
  643. openerTabCache.delete(tabId);
  644. isOpenedTab = true;
  645. openerTabId = openerTabDetails.openerTabId;
  646. tabOpeningTimeStamp = openerTabDetails.timeStamp;
  647. cachedPageVisitsForTab = pageVisitTabCache.get(openerTabDetails.openerTabId);
  648. }
  649. }
  650. // Send a message to the content script with transition information. The content script will
  651. // merge this information with its local information to generate a PageTransitionData event.
  652. messaging.sendMessageToTab(tabId, {
  653. type: "webScience.pageTransition.backgroundScriptEventUpdate",
  654. url,
  655. timeStamp,
  656. webNavigationTimeStamp,
  657. transitionType,
  658. transitionQualifiers,
  659. isHistoryChange,
  660. pageVisitTimeCache,
  661. cachedPageVisitsForTab: (cachedPageVisitsForTab !== undefined) ? cachedPageVisitsForTab : { },
  662. isOpenedTab,
  663. openerTabId,
  664. tabOpeningTimeStamp
  665. });
  666. // Remove stale page visits from the time-based transition cache, retaining the most recent page
  667. // visit in any window and the most recent page visit in only non-private windows. We have to
  668. // track the most recent non-private page separately, since a listener might only be registered
  669. // for transitions involving non-private pages. We perform this expiration after sending a
  670. // message to the content script, for the reasons explained in the pageManager.onPageVisitStart
  671. // listener.
  672. const nowTimeStamp = timing.now();
  673. const expiredCachePageIds = new Set();
  674. let mostRecentPageId = "";
  675. let mostRecentPageVisitStartTime = 0;
  676. let mostRecentNonPrivatePageId = "";
  677. let mostRecentNonPrivatePageVisitStartTime = 0;
  678. for(const cachePageId in pageVisitTimeCache) {
  679. if(pageVisitTimeCache[cachePageId].pageVisitStartTime > mostRecentPageVisitStartTime) {
  680. mostRecentPageId = cachePageId;
  681. mostRecentPageVisitStartTime = pageVisitTimeCache[cachePageId].pageVisitStartTime;
  682. }
  683. if(!pageVisitTimeCache[cachePageId].privateWindow && (pageVisitTimeCache[cachePageId].pageVisitStartTime > mostRecentNonPrivatePageVisitStartTime)) {
  684. mostRecentNonPrivatePageId = cachePageId;
  685. mostRecentNonPrivatePageVisitStartTime = pageVisitTimeCache[cachePageId].pageVisitStartTime;
  686. }
  687. if((nowTimeStamp - pageVisitTimeCache[cachePageId].pageVisitStartTime) > pageVisitTimeCacheExpiry) {
  688. expiredCachePageIds.add(cachePageId);
  689. }
  690. }
  691. expiredCachePageIds.delete(mostRecentPageId);
  692. expiredCachePageIds.delete(mostRecentNonPrivatePageId);
  693. for(const expiredCachePageId of expiredCachePageIds) {
  694. delete pageVisitTimeCache[expiredCachePageId];
  695. }
  696. // Remove stale page visits and clicks from the tab-based transition cache. We don't have to
  697. // handle private and non-private windows separately, because if a tab precedes another tab
  698. // we know they have the same private window status.
  699. if(cachedPageVisitsForTab !== undefined) {
  700. // Expire stale pages, expect for the most recent page if it's also stale
  701. mostRecentPageId = "";
  702. mostRecentPageVisitStartTime = 0;
  703. expiredCachePageIds.clear();
  704. for(const cachePageId in cachedPageVisitsForTab) {
  705. if(cachedPageVisitsForTab[cachePageId].pageVisitStartTime > mostRecentPageVisitStartTime) {
  706. mostRecentPageId = cachePageId;
  707. mostRecentPageVisitStartTime = cachedPageVisitsForTab[cachePageId].pageVisitStartTime;
  708. }
  709. if((nowTimeStamp - cachedPageVisitsForTab[cachePageId].pageVisitStartTime) > pageVisitTabCacheExpiry) {
  710. expiredCachePageIds.add(cachePageId);
  711. }
  712. }
  713. expiredCachePageIds.delete(mostRecentPageId);
  714. for(const expiredCachePageId of expiredCachePageIds) {
  715. delete cachedPageVisitsForTab[expiredCachePageId];
  716. }
  717. // Expire stale clicks on the remaining pages, except for the most recent click if it's
  718. // also stale
  719. for(const cachePageId in cachedPageVisitsForTab) {
  720. let mostRecentClickOnPage = 0;
  721. const clickTimeStamps = [ ];
  722. for(const clickTimeStamp of cachedPageVisitsForTab[cachePageId].clickTimeStamps) {
  723. if((nowTimeStamp - clickTimeStamp) <= clickCacheExpiry) {
  724. clickTimeStamps.push(clickTimeStamp);
  725. }
  726. mostRecentClickOnPage = Math.max(mostRecentClickOnPage, clickTimeStamp);
  727. }
  728. if((clickTimeStamps.length === 0) && (mostRecentClickOnPage > 0)) {
  729. clickTimeStamps.push(mostRecentClickOnPage);
  730. }
  731. cachedPageVisitsForTab[cachePageId].clickTimeStamps = clickTimeStamps;
  732. }
  733. }
  734. }