Source: linkExposure.js

/**
 * This module enables measuring user exposure to linked content. See the
 * `onLinkExposureData` and `onLinkExposureUpdate` events for specifics.
 * There is an important difference between these events: `onLinkExposureData`
 * fires once per page with a complete set of link exposure data, while
 * `onLinkExposureUpdate` fires throughout a page's lifespan as link exposures
 * occur. For most use cases, `onLinkExposureData` is the right event to use.
 *
 * @module linkExposure
 */

import * as events from "./events.js";
import * as linkResolution from "./linkResolution.js";
import * as matching from "./matching.js";
import * as messaging from "./messaging.js";
import * as pageManager from "./pageManager.js";
import * as permissions from "./permissions.js";
import linkExposureContentScript from "include:./content-scripts/linkExposure.content.js";

/**
 * Ignore links where the link URL PS+1 is identical to the page URL PS+1.
 * Note that there is another ignoreSelfLinks constant in the linkExposure
 * content script, and these two constants should have the same value.
 * @constant {boolean}
 * @private
 */
const ignoreSelfLinks = true;

/**
 * The details of a link exposure update event. This private type must be kept in
 * sync with the public `linkExposureUpdateListener` type.
 * @typedef {Object} LinkExposureUpdateDetails
 * @property {string} pageId - The ID for the page, unique across browsing sessions.
 * @property {string} url - The URL of the page, without any hash.
 * @property {string[]} matchingLinkUrls - An array containing the resolved URLs of links
 * on the page that the user was exposed to and that matched a provided match pattern.
 * @property {number} nonmatchingLinkCount - The number of resolved links on the page that
 * the user was exposed to and that did not match a provided match pattern.
 * @private
 */

/**
 * A listener for the `onLinkExposureUpdate` event.
 * @callback linkExposureUpdateListener
 * @memberof module:linkExposure.onLinkExposureUpdate
 * @param {Object} details - Additional information about the link
 * exposure update event.
 * @param {string} details.pageId - The ID for the page, unique across browsing sessions.
 * @param {string} details.url - The URL of the page, without any hash.
 * @param {string[]} details.matchingLinkUrls - An array containing the resolved URLs of links
 * on the page that the user was exposed to and that matched a provided match pattern.
 * @param {number} details.nonmatchingLinkCount - The number of resolved links on the page that
 * the user was exposed to and that did not match a provided match pattern.
 */

/**
 * Options when adding a listener for the `onLinkExposureUpdate` event. This
 * private type must be kept in sync with the public `onLinkExposureUpdate.addListener`
 * type.
 * @typedef {Object} LinkExposureUpdateOptions
 * @property {string[]} linkMatchPatterns - Match patterns for links where the listener
 * should receive individual resolved URLs. Links that do not match this match pattern are
 * included in an aggregate count.
 * @property {string[]} pageMatchPatterns - Match patterns for pages where the listener
 * should be provided link exposure data.
 * @property {boolean} [privateWindows=false] - Whether to measure links in private windows.
 * @private
 */

/**
 * @typedef {Object} LinkExposureUpdateListenerRecord
 * @property {matching.MatchPatternSet} linkMatchPatternSet - The match patterns for link URLs.
 * @property {matching.MatchPatternSet} pageMatchPatternSet - The match patterns for pages.
 * @property {boolean} privateWindows - Whether to report exposures in private windows.
 * @property {browser.contentScripts.RegisteredContentScript} contentScript - The content
 * script associated with the listener.
 * @private
 */

/**
 * A map where each key is a listener and each value is a record for that listener.
 * @constant {Map<linkExposureUpdateListener, LinkExposureUpdateListenerRecord>}
 * @private
 */
const linkExposureUpdateListeners = new Map();

/**
 * A map where each key is a page ID and each value is a count of pending page link exposure updates
 * waiting on link resolution.
 * @constant {Map<string, number>}
 * @private
 */
const pendingPageLinkExposureUpdates = new Map();

/**
 * A map where each key is a page ID and each value is a callback function that is fired when there
 * are no more pending link exposure updates for the page ID.
 * @constant {Map<string, Function>}
 * @private
 */
const pendingPageLinkExposureCallbacks = new Map();

/**
 * Add a listener for the `onLinkExposureUpdate` event.
 * @function addListener
 * @memberof module:linkExposure.onLinkExposureUpdate
 * @param {linkExposureUpdateListener} listener - The listener to add.
 * @param {Object} options - Options for the listener.
 * @param {string[]} options.linkMatchPatterns - Match patterns for links where the listener
 * should receive individual resolved URLs. Links that do not match this match pattern are
 * included in an aggregate count.
 * @param {string[]} options.pageMatchPatterns - Match patterns for pages where the listener
 * should be provided link exposure data.
 * @param {boolean} [options.privateWindows=false] - Whether to measure links in private windows.
 */

/**
 * Remove a listener for the `onLinkExposureUpdate` event.
 * @function removeListener
 * @memberof module:linkExposure.onLinkExposureUpdate
 * @param {linkExposureUpdateListener} listener - The listener to remove.
 */

/**
 * Whether a specified listener has been added for the `onLinkExposureUpdate` event.
 * @function hasListener
 * @memberof module:linkExposure.onLinkExposureUpdate
 * @param {linkExposureUpdateListener} listener - The listener to check.
 * @returns {boolean} Whether the listener has been added for the event.
 */

/**
 * Whether the `onLinkExposureUpdate` event has any listeners.
 * @function hasAnyListeners
 * @memberof module:linkExposure.onLinkExposureUpdate
 * @returns {boolean} Whether the event has any listeners.
 */

/**
 * An event that fires when data about link exposures on a page is available. This event can fire multiple
 * times for one page, as link exposures occur and the URLs for those links are resolved.
 * @namespace
 */
export const onLinkExposureUpdate = events.createEvent({
    name: "webScience.linkExposure.onLinkExposureUpdate",
    addListenerCallback: addUpdateListener,
    removeListenerCallback: removeUpdateListener,
    notifyListenersCallback: () => { return false; }
});

/**
 * Whether the module has been initialized by checking permissions and adding a
 * messaging.onMessage listener.
 * @type {boolean}
 * @private
 */
let initialized = false;

/**
 * Callback for adding an onLinkExposureUpdate listener.
 * @param {linkExposureUpdateListener} listener - The listener.
 * @param {LinkExposureUpdateOptions} options - A set of options for the listener.
 * @private
 */
async function addUpdateListener(listener, { linkMatchPatterns, pageMatchPatterns, privateWindows = false }) {
    // Initialization
    await pageManager.initialize();
    if(!initialized) {
        initialized = true;
        
        permissions.check({
            module: "webScience.linkExposure",
            requiredPermissions: [ "storage" ],
            suggestedPermissions: [ "unlimitedStorage" ]
        });

        messaging.onMessage.addListener(messageListener, {
            type: "webScience.linkExposure.linkExposureUpdate",
            schema: {
                pageId: "string",
                url: "string",
                privateWindow: "boolean",
                linkUrls: "object"
            }
        });
    }

    // Compile the match patterns for link URLs and page URLs
    const linkMatchPatternSet = matching.createMatchPatternSet(linkMatchPatterns);
    const pageMatchPatternSet = matching.createMatchPatternSet(pageMatchPatterns);

    // Register a content script for the page URLs
    const contentScript = await browser.contentScripts.register({
        matches: pageMatchPatterns,
        js: [{
            file: linkExposureContentScript
        }],
        runAt: "document_idle"
    });

    // Store the listener information in a record
    linkExposureUpdateListeners.set(listener, {
        linkMatchPatternSet,
        pageMatchPatternSet,
        privateWindows,
        contentScript
    });
}

/**
 * Callback for removing an onLinkExposureUpdate listener.
 * @param {linkExposureUpdateListener} listener - The listener that is being removed.
 * @private
 */
function removeUpdateListener(listener) {
    // If the listener has a record, unregister its content script and delete
    // the record
    const listenerRecord = linkExposureUpdateListeners.get(listener);
    if(listenerRecord !== undefined) {
        listenerRecord.contentScript.unregister();
        linkExposureUpdateListeners.delete(listener);
    }
}

/**
 * Callback for a link exposure update message from the content script.
 * @param {Options} linkExposureUpdate - The update message.
 * @param {string} linkExposureUpdate.pageId - The page ID for the page where
 * the content script is running.
 * @param {string} linkExposureUpdate.url - The URL, without a hash, for the page
 * where the content script is running.
 * @param {boolean} linkExposureUpdate.privateWindow - Whether the page where the
 * content script is running is in a private window.
 * @param {string[]} linkExposureUpdate.linkUrls - The links on the page that the
 * user was exposed to.
 * @private
 */
function messageListener({ pageId, url, privateWindow, linkUrls }) {
    // Increment the count of pending link exposure updates for the page
    let pendingLinkExposureCount = pendingPageLinkExposureUpdates.get(pageId);
    pendingLinkExposureCount = pendingLinkExposureCount === undefined ? 1 : pendingLinkExposureCount + 1;
    pendingPageLinkExposureUpdates.set(pageId, pendingLinkExposureCount);

    // Resolve all the link URLs in the update, converting each URL into a
    // Promise<string>
    const resolvedLinkUrlPromises = linkUrls.map((linkUrl) => {
        return linkResolution.resolveUrl(linkUrl);
    });

    // Once resolution is complete, notify the linkExposureUpdate listeners
    Promise.allSettled(resolvedLinkUrlPromises).then(async (results) => {
        // For each link URL, if we have a resolved URL, use that
        // If we don't have a resolved URL, use the original URL with
        // cache, shim, and link decoration parsing
        for(const i of linkUrls.keys()) {
            if(results[i].status === "fulfilled") {
                linkUrls[i] = results[i].value;
            }
            else {
                linkUrls[i] = await linkResolution.resolveUrl(linkUrls[i], { request: "none" });
            }
        }

        // If we are ignoring self links, determine whether each link URL is a self link
        // by comparing to the page URL's public suffix + 1
        // These are links that do not appear to be self links in the content
        // script, but resolve to self links
        let selfLinks = null;
        if(ignoreSelfLinks) {
            const pagePS1 = linkResolution.urlToPS1(url);
            selfLinks = linkUrls.map(linkUrl => pagePS1 === linkResolution.urlToPS1(linkUrl))
        }

        // Notify the listeners
        for(const [listener, listenerRecord] of linkExposureUpdateListeners) {
            // Check private window and page match pattern requirements for the listener
            if((!privateWindow || listenerRecord.privateWindows) &&
            listenerRecord.pageMatchPatternSet.matches(url)) {
                const matchingLinkUrls = [];
                let nonmatchingLinkCount = 0;
                for(const i of linkUrls.keys()) {
                    // If we are ignoring self links and a resolved link URL is a self link,
                    // ignore the resolved link URL
                    if(ignoreSelfLinks && selfLinks[i]) {
                        continue;
                    }
                    // Queue the link for reporting to the listener, either as a URL (if matching)
                    // or in a count (if nonmatching)
                    const linkUrl = linkUrls[i];
                    if(listenerRecord.linkMatchPatternSet.matches(linkUrl)) {
                        matchingLinkUrls.push(linkUrl);
                    }
                    else {
                        nonmatchingLinkCount++;
                    }
                }
                listener({
                    pageId,
                    url,
                    matchingLinkUrls,
                    nonmatchingLinkCount
                });
            }
        }

        // Decrement the count of pending link exposure updates for the page
        pendingLinkExposureCount = pendingPageLinkExposureUpdates.get(pageId) - 1;
        if(pendingLinkExposureCount > 0) {
            pendingPageLinkExposureUpdates.set(pageId, pendingLinkExposureCount);
        }
        else {
            pendingPageLinkExposureUpdates.delete(pageId);
        }
        // If there are no more pending link exposures for the page and there's a
        // callback for when the page has no more pending link exposures, call the
        // callback and remove it
        if(pendingLinkExposureCount <= 0) {
            const callback = pendingPageLinkExposureCallbacks.get(pageId);
            if(callback !== undefined) {
                callback();
            }
            pendingPageLinkExposureCallbacks.delete(pageId);
        }
    });
}

/**
 * The details of a link exposure data event. This private type must be kept in sync with
 * the public `linkExposureDataListener` type.
 * @typedef {Object} LinkExposureDataDetails
 * @property {string} pageId - The ID for the page, unique across browsing sessions.
 * @property {string} url - The URL of the page, without any hash.
 * @property {string[]} matchingLinkUrls - An array containing the resolved URLs of links
 * on the page that the user was exposed to and that matched a provided match pattern.
 * @property {number} nonmatchingLinkCount - The number of resolved links on the page that
 * the user was exposed to and that did not match a provided match pattern.
 * @private
 */

/**
 * A callback function for the link exposure data event.
 * @callback linkExposureDataListener
 * @memberof module:linkExposure.onLinkExposureData
 * @param {Object} details - Additional information about the link exposure date event.
 * @param {string} details.pageId - The ID for the page, unique across browsing sessions.
 * @param {string} details.url - The URL of the page, without any hash.
 * @param {string[]} details.matchingLinkUrls - An array containing the resolved URLs of links
 * on the page that the user was exposed to and that matched a provided match pattern.
 * @param {number} details.nonmatchingLinkCount - The number of resolved links on the page that
 * the user was exposed to and that did not match a provided match pattern.
 */

/**
 * Options when adding a listener for the `onLinkExposureData` event. This private type must
 * be kept in sync with the public `onLinkExposureData.addListener` type.
 * @typedef {Object} LinkExposureDataOptions
 * @property {string[]} linkMatchPatterns - Match patterns for links where the listener
 * should receive individual resolved URLs. Links that do not match this match pattern are
 * included in an aggregate count.
 * @property {string[]} pageMatchPatterns - Match patterns for pages where the listener
 * should be provided link exposure data.
 * @property {boolean} [privateWindows=false] - Whether to measure links in private windows.
 * @private
 */

/**
 * @typedef {Object} LinkExposureDataListenerRecord
 * @property {linkExposureUpdateListener} linkExposureUpdateListener - The listener for onLinkExposureUpdate
 * that was created for this onLinkExposureData listener.
 * @property {Map<string,LinkExposureDataDetails>} pageLinkExposureData - A map where keys are page IDs and values
 * are LinkExposureDataDetails reflecting partial link exposure data for a page.
 * @private
 */

/**
 * A map where each key is a listener and each value is a record for that listener.
 * @constant {Map<linkExposureDataListener, LinkExposureDataListenerRecord>}
 * @private
 */
const linkExposureDataListeners = new Map();

/**
 * Add a listener for the `onLinkExposureData` event.
 * @function addListener
 * @memberof module:linkExposure.onLinkExposureData
 * @param {linkExposureDataListener} listener - The listener to add.
 * @param {Object} options - Options for the listener.
 * @param {string[]} options.linkMatchPatterns - Match patterns for links where the listener
 * should receive individual resolved URLs. Links that do not match this match pattern are
 * included in an aggregate count.
 * @param {string[]} options.pageMatchPatterns - Match patterns for pages where the listener
 * should be provided link exposure data.
 * @param {boolean} [options.privateWindows=false] - Whether to measure links in private windows.
 */

/**
 * Remove a listener for the `onLinkExposureData` event.
 * @function removeListener
 * @memberof module:linkExposure.onLinkExposureData
 * @param {linkExposureDataListener} listener - The listener to remove.
 */

/**
 * Whether a specified listener has been added for the `onLinkExposureData` event.
 * @function hasListener
 * @memberof module:linkExposure.onLinkExposureData
 * @param {linkExposureDataListener} listener - The listener to check.
 * @returns {boolean} Whether the listener has been added for the event.
 */

/**
 * Whether the `onLinkExposureData` event has any listeners.
 * @function hasAnyListeners
 * @memberof module:linkExposure.onLinkExposureData
 * @returns {boolean} Whether the event has any listeners.
 */

/**
 * Whether the pageManager.onPageVisitStart and pageManager.onPageVisitStop listeners have been added.
 * @type {boolean}
 * @private
 */
let addedPageVisitListeners = false;
 
/**
 * An event that fires when a complete set of data about link exposures on a page is available. This event
 * only fires once per page, after the page visit has ended.
 * @namespace
 */
export const onLinkExposureData = events.createEvent({
    name: "webScience.linkExposure.onLinkExposureData",
    addListenerCallback: addDataListener,
    removeListenerCallback: removeDataListener,
    notifyListenersCallback: () => { return false; }
});

/**
 * A short period of time to wait, in milliseconds, after the onPageVisitStop event before attempting the
 * onLinkExposureData event. We need to wait a short period because there can be lingering
 * onLinkExposureUpdate events after onPageVisitStop (e.g., links that are still getting resolved or a
 * final message from the linkExposure content script when the page visit ends).
 * @constant {number}
 * @private
 */
const pageVisitStopDelay = 500;

/**
 * Callback for adding an onLinkExposureData listener.
 * @param {linkExposureDataListener} listener - The listener.
 * @param {LinkExposureDataOptions} options - A set of options for the listener.
 * @private
 */
async function addDataListener(listener, options) {
    if(!addedPageVisitListeners) {
        // When a page visit starts, for each link exposure data listener with a matching page match pattern,
        // create an object to accumulate link exposures on that page
        pageManager.onPageVisitStart.addListener(pageVisitStartDetails => {
            for(const linkExposureDataListenerRecord of linkExposureDataListeners.values()) {
                const linkExposureUpdateListenerRecord = linkExposureUpdateListeners.get(linkExposureDataListenerRecord.linkExposureUpdateListener);
                if(linkExposureUpdateListenerRecord.pageMatchPatternSet.matches(pageVisitStartDetails.url)) {
                    linkExposureDataListenerRecord.pageLinkExposureData.set(pageVisitStartDetails.pageId, {
                        pageId: pageVisitStartDetails.pageId,
                        url: pageVisitStartDetails.url,
                        matchingLinkUrls: [],
                        nonmatchingLinkCount: 0
                    });
                }
            }
        });

        // When a page visit ends, wait a short period because link resolution might still be pending
        pageManager.onPageVisitStop.addListener(pageVisitStopDetails => {
            setTimeout(() => {
                // Create a callback function to notify onPageVisitData listeners about the link exposures on the page
                // and delete the store of aggregated link exposures
                const notifyListeners = () => {
                    for(const [linkExposureDataListener, linkExposureDataListenerRecord] of linkExposureDataListeners) {
                        const linkExposureDataForPage = linkExposureDataListenerRecord.pageLinkExposureData.get(pageVisitStopDetails.pageId);
                        // If there's at least one link exposure to report on the page, notify the listener
                        if(linkExposureDataForPage !== undefined) {
                            if((linkExposureDataForPage.matchingLinkUrls.length > 0) || (linkExposureDataForPage.nonmatchingLinkCount > 0)) {
                                linkExposureDataListener(linkExposureDataForPage);
                            }
                            // Delete the listener's accumulated link exposure data for the page
                            linkExposureDataListenerRecord.pageLinkExposureData.delete(pageVisitStopDetails.pageId);
                        }
                    }
                };
                // If there are no pending link exposure updates for the page, immediately call the callback function
                if(!pendingPageLinkExposureUpdates.has(pageVisitStopDetails.pageId)) {
                    notifyListeners();
                }
                // Otherwise, set the callback function to be called when there are no more pending link exposures for
                // the page
                else {
                    pendingPageLinkExposureCallbacks.set(pageVisitStopDetails.pageId, notifyListeners);
                }
            }, pageVisitStopDelay);
        });
        addedPageVisitListeners = true;
    }

    // Create a record of the onLinkExposureData listener, including a new onLinkExposureUpdate listener
    const linkExposureDataListenerRecord = {
        pageLinkExposureData: new Map(),
        // When the onLinkExposureUpdate listener fires for this onLinkExposureData listener, accumulate
        // the link exposures on the page for this listener
        linkExposureUpdateListener: linkExposureUpdateDetails => {
            const linkExposureDataForPage = linkExposureDataListenerRecord.pageLinkExposureData.get(linkExposureUpdateDetails.pageId);
            if(linkExposureDataForPage !== undefined) {
                linkExposureDataForPage.matchingLinkUrls = linkExposureDataForPage.matchingLinkUrls.concat(linkExposureUpdateDetails.matchingLinkUrls);
                linkExposureDataForPage.nonmatchingLinkCount += linkExposureUpdateDetails.nonmatchingLinkCount;
            }
        }
    };
    linkExposureDataListeners.set(listener, linkExposureDataListenerRecord);
    onLinkExposureUpdate.addListener(linkExposureDataListenerRecord.linkExposureUpdateListener, options);
}

/**
 * Callback for removing an onLinkExposureData listener.
 * @param {linkExposureDataListener} listener - The listener that is being removed.
 * @private
 */
function removeDataListener(listener) {
    // If the listener has a record, unregister its onLinkExposureUpdate listener
    // and delete the record
    const listenerRecord = linkExposureDataListeners.get(listener);
    if(listenerRecord !== undefined) {
        onLinkExposureUpdate.removeListener(listenerRecord.linkExposureUpdateListener);
        linkExposureDataListeners.delete(listener);
    }
}