Run visual metrics processing within the test task.

2 years ago · 9ed5e55318
parent 5597ed6e98
commit 9ed5e55318
11 changed files with 7 additions and 1291 deletions
--- a/taskcluster/ci/browsertime/kind.yml
+++ b/taskcluster/ci/browsertime/kind.yml
@ -37,7 +37,7 @@ job-defaults:
                    subject: '[{product_name}] Raptor-Browsertime job "{task_name}" failed'
                    to-addresses: [perftest-alerts@mozilla.com]
            default: {}
-    run-on-tasks-for: []
+    run-on-tasks-for: [github-pull-request]
    treeherder:
        kind: test
        tier: 2
@ -98,7 +98,7 @@ job-defaults:
            - linux64-ffmpeg-4.1.4
            - linux64-geckodriver
            - linux64-minidump-stackwalk
-            - linux64-node
+            - linux64-node-16

 jobs:
    tp6m:
--- a/taskcluster/ci/docker-image/kind.yml
+++ b/taskcluster/ci/docker-image/kind.yml
@ -22,6 +22,3 @@ jobs:
    ui-tests:
        parent: base
        symbol: I(ui-tests)
-    visual-metrics:
-        parent: base
-        symbol: I(visual-metrics)
--- a/taskcluster/ci/toolchain/gecko-derived.yml
+++ b/taskcluster/ci/toolchain/gecko-derived.yml
@ -49,10 +49,10 @@ linux64-node:
        index-search:
            - gecko.cache.level-3.toolchains.v3.linux64-node-12.latest

-visual-metrics:
+linux64-node-16:
    attributes:
-        toolchain-artifact: public/visualmetrics.py
-    description: "Browsertime visual metrics analysis script"
+        toolchain-artifact: public/build/node.tar.zst
+    description: "Node.js toolchain"
    run:
        index-search:
-            - gecko.cache.level-3.content.v1.visual-metrics.latest
+            - gecko.cache.level-3.toolchains.v3.linux64-node-16.latest
--- a/taskcluster/ci/visual-metrics/kind.yml
+++ b/taskcluster/ci/visual-metrics/kind.yml
@ -1,51 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
---
-loader: fenix_taskgraph.loader.multi_dep:loader
-
-kind-dependencies:
-    - browsertime
-    - toolchain
-
-primary-dependency:
-    - browsertime
-
-group-by: attributes
-
-only-for-attributes:
-    - run-visual-metrics
-
-transforms:
-    - fenix_taskgraph.transforms.visual_metrics:transforms
-    - taskgraph.transforms.job:transforms
-    - taskgraph.transforms.task:transforms
-
-job-template:
-    attributes:
-        nightly: true
-    description: "Run visual metrics calculations on Raptor"
-    run-on-projects: []
-    run-on-tasks-for: []
-    worker-type: b-android
-    treeherder:
-        tier: 2
-        kind: other
-    worker:
-        docker-image: {in-tree: visual-metrics}
-        max-run-time: 900
-        artifacts:
-            - type: file
-              name: public/perfherder-data.json
-              path: /builds/worker/artifacts/perfherder-data.json
-            - type: file
-              name: public/summary.json
-              path: /builds/worker/artifacts/summary.json
-    fetches:
-        toolchain:
-            - visual-metrics
-    run:
-        using: run-task
-        command: /builds/worker/bin/run-visual-metrics.py -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport
-        checkout: false
-        run-as-root: true
--- a/taskcluster/docker/visual-metrics/Dockerfile
+++ b/taskcluster/docker/visual-metrics/Dockerfile
@ -1,30 +0,0 @@
-FROM $DOCKER_IMAGE_PARENT
-MAINTAINER Gregory Mierzwinski <gmierzwinski@mozilla.com>
-
-# run-task expects to run as root
-USER root
-
-RUN apt-get update -qq && \
-    apt-get install -y \
-      ffmpeg \
-      imagemagick \
-      pyssim \
-      python \
-      python-pil
-
-WORKDIR /builds/worker
-
-USER worker:worker
-
-COPY requirements.txt /builds/worker/requirements.txt
-RUN pip3 install --require-hashes -r /builds/worker/requirements.txt && \
-    rm /builds/worker/requirements.txt
-
-COPY similarity.py /builds/worker/bin/similarity.py
-COPY run-visual-metrics.py /builds/worker/bin/run-visual-metrics.py
-COPY performance-artifact-schema.json /builds/worker/performance-artifact-schema.json
-
-USER root
-RUN chmod +x /builds/worker/bin/run-visual-metrics.py
-
-VOLUME /builds/worker/artifacts/
--- a/taskcluster/docker/visual-metrics/performance-artifact-schema.json
+++ b/taskcluster/docker/visual-metrics/performance-artifact-schema.json
@ -1,230 +0,0 @@
-{
-    "definitions": {
-        "application_schema": {
-            "properties": {
-                "name": {
-                    "title": "Application under performance test",
-                    "enum": [
-                        "firefox",
-                        "chrome",
-                        "chrome-m",
-                        "chromium",
-                        "fennec",
-                        "geckoview",
-                        "refbrow",
-                        "fenix"
-                    ],
-                    "maxLength": 10,
-                    "type": "string"
-                },
-                "version": {
-                    "title": "Application's version",
-                    "maxLength": 40,
-                    "type": "string"
-                }
-            },
-            "required": ["name"],
-            "type": "object"
-        },
-        "framework_schema": {
-            "properties": {
-                "name": {
-                    "title": "Framework name",
-                    "type": "string"
-                }
-            },
-            "type": "object"
-        },
-        "subtest_schema": {
-            "properties": {
-                "name": {
-                    "title": "Subtest name",
-                    "type": "string"
-                },
-                "publicName": {
-                    "title": "Public subtest name",
-                    "description": "Allows renaming test's name, without breaking existing performance data series",
-                    "maxLength": 30,
-                    "type": "string"
-                },
-                "value": {
-                    "description": "Summary value for subtest",
-                    "title": "Subtest value",
-                    "type": "number",
-                    "minimum": -1000000000000.0,
-                    "maximum": 1000000000000.0
-                },
-                "unit": {
-                    "title": "Measurement unit",
-                    "type": "string",
-                    "minLength": 1,
-                    "maxLength": 20
-                },
-                "lowerIsBetter": {
-                    "description": "Whether lower values are better for subtest",
-                    "title": "Lower is better",
-                    "type": "boolean"
-                },
-                "shouldAlert": {
-                    "description": "Whether we should alert",
-                    "title": "Should alert",
-                    "type": "boolean"
-                },
-                "alertThreshold": {
-                    "description": "% change threshold before alerting",
-                    "title": "Alert threshold",
-                    "type": "number",
-                    "minimum": 0.0,
-                    "maximum": 1000.0
-                },
-                "minBackWindow": {
-                    "description": "Minimum back window to use for alerting",
-                    "title": "Minimum back window",
-                    "type": "number",
-                    "minimum": 1,
-                    "maximum": 255
-                },
-                "maxBackWindow": {
-                    "description": "Maximum back window to use for alerting",
-                    "title": "Maximum back window",
-                    "type": "number",
-                    "minimum": 1,
-                    "maximum": 255
-                },
-                "foreWindow": {
-                    "description": "Fore window to use for alerting",
-                    "title": "Fore window",
-                    "type": "number",
-                    "minimum": 1,
-                    "maximum": 255
-                }
-            },
-            "required": [
-                "name",
-                "value"
-            ],
-            "type": "object"
-        },
-        "suite_schema": {
-            "properties": {
-                "name": {
-                    "title": "Suite name",
-                    "type": "string"
-                },
-                "publicName": {
-                    "title": "Public suite name",
-                    "description": "Allows renaming suite's name, without breaking existing performance data series",
-                    "maxLength": 30,
-                    "type": "string"
-                },
-                "tags": {
-                    "type": "array",
-                    "title": "Free form tags, which ease the grouping & searching of performance tests",
-                    "description": "Similar to extraOptions, except it does not break existing performance data series",
-                    "items": {
-                        "type": "string",
-                        "pattern": "^[a-zA-Z0-9-]{1,24}$"
-                    },
-                    "uniqueItems": true,
-                    "maxItems": 14
-                },
-                "extraOptions": {
-                    "type": "array",
-                    "title": "Extra options used in running suite",
-                    "items": {
-                        "type": "string",
-                        "maxLength": 100
-                    },
-                    "uniqueItems": true,
-                    "maxItems": 8
-                },
-                "subtests": {
-                    "items": {
-                        "$ref": "#/definitions/subtest_schema"
-                    },
-                    "title": "Subtests",
-                    "type": "array"
-                },
-                "value": {
-                    "title": "Suite value",
-                    "type": "number",
-                    "minimum": -1000000000000.0,
-                    "maximum": 1000000000000.0
-                },
-                "unit": {
-                    "title": "Measurement unit",
-                    "type": "string",
-                    "minLength": 1,
-                    "maxLength": 20
-                },
-                "lowerIsBetter": {
-                    "description": "Whether lower values are better for suite",
-                    "title": "Lower is better",
-                    "type": "boolean"
-                },
-                "shouldAlert": {
-                    "description": "Whether we should alert on this suite (overrides default behaviour)",
-                    "title": "Should alert",
-                    "type": "boolean"
-                },
-                "alertThreshold": {
-                    "description": "% change threshold before alerting",
-                    "title": "Alert threshold",
-                    "type": "number",
-                    "minimum": 0.0,
-                    "maximum": 1000.0
-                },
-                "minBackWindow": {
-                    "description": "Minimum back window to use for alerting",
-                    "title": "Minimum back window",
-                    "type": "integer",
-                    "minimum": 1,
-                    "maximum": 255
-                },
-                "maxBackWindow": {
-                    "description": "Maximum back window to use for alerting",
-                    "title": "Maximum back window",
-                    "type": "integer",
-                    "minimum": 1,
-                    "maximum": 255
-                },
-                "foreWindow": {
-                    "description": "Fore window to use for alerting",
-                    "title": "Fore window",
-                    "type": "integer",
-                    "minimum": 1,
-                    "maximum": 255
-                }
-            },
-            "required": [
-                "name",
-                "subtests"
-            ],
-            "type": "object"
-        }
-    },
-    "description": "Structure for submitting performance data as part of a job",
-    "id": "https://treeherder.mozilla.org/schemas/v1/performance-artifact.json#",
-    "properties": {
-        "application":{
-            "$ref": "#/definitions/application_schema"
-        },
-        "framework": {
-            "$ref": "#/definitions/framework_schema"
-        },
-        "suites": {
-            "description": "List of suite-level data submitted as part of this structure",
-            "items": {
-                "$ref": "#/definitions/suite_schema"
-            },
-            "title": "Performance suites",
-            "type": "array"
-        }
-    },
-    "required": [
-        "framework",
-        "suites"
-    ],
-    "title": "Perfherder Schema",
-    "type": "object"
-}
--- a/taskcluster/docker/visual-metrics/requirements.txt
+++ b/taskcluster/docker/visual-metrics/requirements.txt
@ -1,23 +0,0 @@
-# Dependency hashes must be for python3.6
-
-# Direct dependencies
-attrs==19.1.0 --hash=sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79
-structlog==19.1.0 --hash=sha256:db441b81c65b0f104a7ce5d86c5432be099956b98b8a2c8be0b3fb3a7a0b1536
-voluptuous==0.11.5 --hash=sha256:303542b3fc07fb52ec3d7a1c614b329cdbee13a9d681935353d8ea56a7bfa9f1
-jsonschema==3.2.0 --hash=sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163
-numpy==1.18.3 --hash=sha256:a551d8cc267c634774830086da42e4ba157fa41dd3b93982bc9501b284b0c689
-scipy==1.4.1 --hash=sha256:386086e2972ed2db17cebf88610aab7d7f6e2c0ca30042dc9a89cf18dcc363fa
-matplotlib==3.0.3 --hash=sha256:e8d1939262aa6b36d0c51f50a50a43a04b9618d20db31e6c0192b1463067aeef
-opencv-python==4.2.0.34 --hash=sha256:dcb8da8c5ebaa6360c8555547a4c7beb6cd983dd95ba895bb78b86cc8cf3de2b
-
-# Transitive dependencies
-importlib_metadata==1.1.0 --hash=sha256:e6ac600a142cf2db707b1998382cc7fc3b02befb7273876e01b8ad10b9652742
-more_itertools==8.0.0 --hash=sha256:a0ea684c39bc4315ba7aae406596ef191fd84f873d2d2751f84d64e81a7a2d45
-pyrsistent==0.15.6 --hash=sha256:f3b280d030afb652f79d67c5586157c5c1355c9a58dfc7940566e28d28f3df1b
-six==1.12.0 --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c
-zipp==0.6.0 --hash=sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335
-cycler==0.10.0 --hash=sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d
-kiwisolver==1.1.0 --hash=sha256:400599c0fe58d21522cae0e8b22318e09d9729451b17ee61ba8e1e7c0346565c
-pyparsing==2.4.7 --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b
-python-dateutil==2.8.1 --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a
-setuptools==46.1.3 --hash=sha256:4fe404eec2738c20ab5841fa2d791902d2a645f32318a7850ef26f8d7215a8ee
--- a/taskcluster/docker/visual-metrics/run-visual-metrics.py
+++ b/taskcluster/docker/visual-metrics/run-visual-metrics.py
@ -1,496 +0,0 @@
-#!/usr/bin/env python3
-#
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-"""Instrument visualmetrics.py to run in parallel."""
-
-import argparse
-import json
-import logging
-import os
-import statistics
-import subprocess
-import sys
-import tarfile
-import time
-from concurrent.futures import ProcessPoolExecutor
-from functools import partial
-from multiprocessing import cpu_count
-from pathlib import Path
-
-import attr
-import structlog
-from jsonschema import validate
-from voluptuous import ALLOW_EXTRA, Required, Schema
-
-
-#: The max run time for a command (5 minutes)
-MAX_TIME = 300
-
-
-#: The directory where artifacts from this job will be placed.
-OUTPUT_DIR = Path("/", "builds", "worker", "artifacts")
-
-
-#: A job to process through visualmetrics.py
-@attr.s
-class Job:
-    #: The name of the test.
-    test_name = attr.ib(type=str)
-
-    #: A unique number for the job.
-    count = attr.ib(type=int)
-
-    #: The tags for this job.
-    tags = attr.ib(type=str)
-
-    #: The extra options for this job.
-    extra_options = attr.ib(type=str)
-
-    #: If true, we allow 0's in the vismet results
-    accept_zero_vismet = attr.ib(type=bool)
-
-    #: json_path: The path to the ``browsertime.json`` file on disk.
-    json_path = attr.ib(type=Path)
-
-    #: video_path: The path of the video file on disk.
-    video_path = attr.ib(type=Path)
-
-
-#: The schema for validating jobs.
-JOB_SCHEMA = Schema(
-    {
-        Required("jobs"): [
-            {
-                Required("test_name"): str,
-                Required("browsertime_json_path"): str,
-                Required("tags"): [str],
-                Required("extra_options"): [str],
-                Required("accept_zero_vismet"): bool,
-            }
-        ],
-        Required("application"): {Required("name"): str, "version": str},
-        Required("extra_options"): [str],
-    }
-)
-
-#: A partial schema for browsertime.json files.
-BROWSERTIME_SCHEMA = Schema(
-    [{Required("files"): {Required("video"): [str]}}], extra=ALLOW_EXTRA
-)
-
-SHOULD_ALERT = {
-    "ContentfulSpeedIndex": True,
-    "FirstVisualChange": True,
-    "LastVisualChange": True,
-    "PerceptualSpeedIndex": True,
-    "SpeedIndex": True,
-    "videoRecordingStart": False,
-}
-
-with Path("/", "builds", "worker", "performance-artifact-schema.json").open() as f:
-    PERFHERDER_SCHEMA = json.loads(f.read())
-
-
-def run_command(log, cmd, job_count):
-    """Run a command using subprocess.check_output
-
-    Args:
-        log: The structlog logger instance.
-        cmd: the command to run as a list of strings.
-
-    Returns:
-        A tuple of the process' exit status and standard output.
-    """
-    log.info("Running command", cmd=cmd)
-    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-
-    lines = []
-    res = None
-    start = time.time()
-    while time.time() - start <= MAX_TIME:
-        time.sleep(0.1)
-        output = process.stdout.readline()
-        if output == b"" and process.poll() is not None:
-            break
-        if output:
-            res = output.strip()
-            lines.append(res.decode("utf-8", "ignore"))
-        else:
-            time.sleep(5)
-
-    if time.time() - start > MAX_TIME:
-        log.error(
-            "TEST-UNEXPECTED-FAIL | Timed out waiting for response from command",
-            cmd=cmd,
-        )
-        return 1, "Timed out"
-
-    rc = process.poll()
-    job_prefix = "[JOB-" + str(job_count) + "] "
-    for line in lines:
-        # Some output doesn't start with the levels because it comes
-        # from FFMPEG rather than the script itself
-        if line.startswith(("[INFO]", "[WARNING]", "[CRITICAL]", "[ERROR]")):
-            splitline = line.split(" - ")
-            level = splitline[0]
-            line = " - ".join(splitline[1:])
-        else:
-            level = "[INFO]"
-
-        newline = job_prefix + line
-        if level.strip() in ("[ERROR]", "[CRITICAL]"):
-            if rc == 0:
-                rc = 1
-            log.error("TEST-UNEXPECTED-FAIL | " + newline)
-        elif level == "[WARNING]":
-            log.warning(newline)
-        else:
-            log.info(newline)
-
-    return rc, res
-
-
-def append_result(log, suites, test_name, name, result, tags, extra_options):
-    """Appends a ``name`` metrics result in the ``test_name`` suite.
-
-    Args:
-        log: The structlog logger instance.
-        suites: A mapping containing the suites.
-        test_name: The name of the test.
-        name: The name of the metrics.
-        result: The value to append.
-    """
-    if name.endswith("Progress"):
-        return
-    try:
-        result = int(result)
-    except ValueError:
-        log.error("Could not convert value", name=name)
-        log.error("%s" % result)
-        result = 0
-
-    orig_test_name = test_name
-    if test_name in suites and suites[test_name]["extraOptions"] != extra_options:
-        missing = set(extra_options) - set(suites[test_name]["extraOptions"])
-        test_name = test_name + "-".join(list(missing))
-
-    subtests = suites.setdefault(
-        test_name,
-        {
-            "name": orig_test_name,
-            "tags": extra_options + tags + ["visual"],
-            "subtests": {},
-            "extraOptions": extra_options,
-        },
-    )["subtests"]
-
-    if name not in subtests:
-        subtests[name] = {
-            "name": name,
-            "replicates": [result],
-            "lowerIsBetter": True,
-            "unit": "ms",
-            "shouldAlert": SHOULD_ALERT.get(name, False),
-        }
-    else:
-        subtests[name]["replicates"].append(result)
-
-
-def compute_median(subtest):
-    """Adds in the subtest the ``value`` field, which is the average of all
-    replicates.
-
-    Args:
-        subtest: The subtest containing all replicates.
-
-    Returns:
-        The subtest.
-    """
-    if "replicates" not in subtest:
-        return subtest
-    subtest["value"] = statistics.median(subtest["replicates"])
-    return subtest
-
-
-def get_suite(suite):
-    """Returns the suite with computed medians in its subtests.
-
-    Args:
-        suite: The suite to convert.
-
-    Returns:
-        The suite.
-    """
-    suite["subtests"] = [
-        compute_median(subtest) for subtest in suite["subtests"].values()
-    ]
-    return suite
-
-
-def read_json(json_path, schema):
-    """Read the given json file and verify against the provided schema.
-
-    Args:
-        json_path: Path of json file to parse.
-        schema: A callable to validate the JSON's schema.
-
-    Returns:
-        The contents of the file at ``json_path`` interpreted as JSON.
-    """
-    try:
-        with open(str(json_path), "r", encoding="utf-8", errors="ignore") as f:
-            data = json.load(f)
-    except Exception:
-        log.error("Could not read JSON file", path=json_path, exc_info=True)
-        raise
-
-    log.info("Loaded JSON from file", path=json_path)
-
-    try:
-        schema(data)
-    except Exception:
-        log.error("JSON failed to validate", exc_info=True)
-        raise
-
-    return data
-
-
-def main(log, args):
-    """Run visualmetrics.py in parallel.
-
-    Args:
-        log: The structlog logger instance.
-        args: The parsed arguments from the argument parser.
-
-    Returns:
-        The return code that the program will exit with.
-    """
-    fetch_dir = os.getenv("MOZ_FETCHES_DIR")
-    if not fetch_dir:
-        log.error("Expected MOZ_FETCHES_DIR environment variable.")
-        return 1
-
-    fetch_dir = Path(fetch_dir)
-
-    visualmetrics_path = fetch_dir / "visualmetrics.py"
-    if not visualmetrics_path.exists():
-        log.error(
-            "Could not locate visualmetrics.py", expected_path=str(visualmetrics_path)
-        )
-        return 1
-
-    browsertime_results_path = fetch_dir / "browsertime-results.tgz"
-
-    try:
-        with tarfile.open(str(browsertime_results_path)) as tar:
-            tar.extractall(path=str(fetch_dir))
-    except Exception:
-        log.error(
-            "Could not read/extract browsertime results archive",
-            path=browsertime_results_path,
-            exc_info=True,
-        )
-        return 1
-    log.info("Extracted browsertime results", path=browsertime_results_path)
-
-    try:
-        jobs_json_path = fetch_dir / "browsertime-results" / "jobs.json"
-        jobs_json = read_json(jobs_json_path, JOB_SCHEMA)
-    except Exception:
-        log.error(
-            "Could not open the jobs.json file", path=jobs_json_path, exc_info=True
-        )
-        return 1
-
-    jobs = []
-    count = 0
-
-    for job in jobs_json["jobs"]:
-        browsertime_json_path = fetch_dir / job["browsertime_json_path"]
-
-        try:
-            browsertime_json = read_json(browsertime_json_path, BROWSERTIME_SCHEMA)
-        except Exception:
-            log.error(
-                "Could not open a browsertime.json file",
-                path=browsertime_json_path,
-                exc_info=True,
-            )
-            return 1
-
-        for site in browsertime_json:
-            for video in site["files"]["video"]:
-                count += 1
-                name = job["test_name"]
-                if "alias" in site["info"] and site["info"]["alias"].strip() != "":
-                    name = "%s.%s" % (name, site["info"]["alias"])
-                jobs.append(
-                    Job(
-                        test_name=name,
-                        tags=job["tags"],
-                        extra_options=len(job["extra_options"]) > 0
-                        and job["extra_options"]
-                        or jobs_json["extra_options"],
-                        accept_zero_vismet=job["accept_zero_vismet"],
-                        json_path=browsertime_json_path,
-                        video_path=browsertime_json_path.parent / video,
-                        count=count,
-                    )
-                )
-
-    failed_runs = 0
-    suites = {}
-
-    with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
-        for job, result in zip(
-            jobs,
-            executor.map(
-                partial(
-                    run_visual_metrics,
-                    visualmetrics_path=visualmetrics_path,
-                    options=args.visual_metrics_options,
-                ),
-                jobs,
-            ),
-        ):
-            returncode, res = result
-            if returncode != 0:
-                log.error(
-                    "Failed to run visualmetrics.py",
-                    video_path=job.video_path,
-                    error=res,
-                )
-                failed_runs += 1
-            else:
-                for name, value in res.items():
-                    append_result(
-                        log,
-                        suites,
-                        job.test_name,
-                        name,
-                        value,
-                        job.tags,
-                        job.extra_options,
-                    )
-
-    suites = [get_suite(suite) for suite in suites.values()]
-
-    perf_data = {
-        "framework": {"name": "browsertime"},
-        "application": jobs_json["application"],
-        "type": "pageload",
-        "suites": suites,
-    }
-
-    # TODO: Try to get the similarity for all possible tests, this means that we
-    # will also get a comparison of recorded vs. live sites to check the on-going
-    # quality of our recordings.
-    # Bug 1674927 - Similarity metric is disabled until we figure out
-    # why it had a huge increase in run time.
-
-    # Validates the perf data complies with perfherder schema.
-    # The perfherder schema uses jsonschema so we can't use voluptuous here.
-    validate(perf_data, PERFHERDER_SCHEMA)
-
-    raw_perf_data = json.dumps(perf_data)
-    with Path(OUTPUT_DIR, "perfherder-data.json").open("w") as f:
-        f.write(raw_perf_data)
-    # Prints the data in logs for Perfherder to pick it up.
-    log.info("PERFHERDER_DATA: %s" % raw_perf_data)
-
-    # Lists the number of processed jobs, failures, and successes.
-    with Path(OUTPUT_DIR, "summary.json").open("w") as f:
-        json.dump(
-            {
-                "total_jobs": len(jobs),
-                "successful_runs": len(jobs) - failed_runs,
-                "failed_runs": failed_runs,
-            },
-            f,
-        )
-
-    # If there's one failure along the way, we want to return > 0
-    # to trigger a red job in TC.
-    return failed_runs
-
-
-def run_visual_metrics(job, visualmetrics_path, options):
-    """Run visualmetrics.py on the input job.
-
-    Returns:
-       A returncode and a string containing the output of visualmetrics.py
-    """
-    cmd = [
-        "/usr/bin/python",
-        str(visualmetrics_path),
-        "-vvv",
-        "--logformat",
-        "[%(levelname)s] - %(message)s",
-        "--video",
-        str(job.video_path),
-    ]
-    cmd.extend(options)
-    rc, res = run_command(log, cmd, job.count)
-
-    if rc == 0:
-        # Python 3.5 requires a str object (not 3.6+)
-        res = json.loads(res.decode("utf8"))
-
-        failed_tests = []
-        if not job.accept_zero_vismet:
-            # Ensure that none of these values are at 0 which
-            # is indicative of a failling test
-            monitored_tests = [
-                "contentfulspeedindex",
-                "lastvisualchange",
-                "perceptualspeedindex",
-                "speedindex",
-            ]
-            for metric, val in res.items():
-                if metric.lower() in monitored_tests and val == 0:
-                    failed_tests.append(metric)
-
-        if failed_tests:
-            log.error(
-                "TEST-UNEXPECTED-FAIL | Some visual metrics have an erroneous value of 0."
-            )
-            log.info("Tests which failed: %s" % str(failed_tests))
-            rc += 1
-
-    return rc, res
-
-
-if __name__ == "__main__":
-    logging.basicConfig(format="%(levelname)s - %(message)s", level=logging.INFO)
-    structlog.configure(
-        processors=[
-            structlog.processors.format_exc_info,
-            structlog.dev.ConsoleRenderer(colors=False),
-        ],
-        logger_factory=structlog.stdlib.LoggerFactory(),
-        cache_logger_on_first_use=True,
-    )
-
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-
-    parser.add_argument(
-        "visual_metrics_options",
-        type=str,
-        metavar="VISUAL-METRICS-OPTIONS",
-        help="Options to pass to visualmetrics.py",
-        nargs="*",
-    )
-
-    args = parser.parse_args()
-    log = structlog.get_logger()
-
-    try:
-        sys.exit(main(log, args))
-    except Exception as e:
-        log.error("Unhandled exception: %s" % e, exc_info=True)
-        sys.exit(1)
--- a/taskcluster/docker/visual-metrics/similarity.py
+++ b/taskcluster/docker/visual-metrics/similarity.py
@ -1,360 +0,0 @@
-#!/usr/bin/env python3
-#
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-import cv2
-import json
-import numpy as np
-import os
-import pathlib
-import shutil
-import socket
-import structlog
-import tarfile
-import tempfile
-import urllib
-
-from functools import wraps
-from matplotlib import pyplot as plt
-from scipy.stats import spearmanr
-
-
-log = None
-
-
-# We add the `and` conditions to it later
-base_ad_query = {
-    "from": "task",
-    "limit": 1000,
-    "where": {
-        "and": []
-    },
-    "select": [
-        "action.start_time",
-        "run.name",
-        "task.artifacts",
-        "task.group.id",
-        "task.id"
-    ],
-}
-
-
-def socket_timeout(value=120):
-    """Decorator for socket timeouts."""
-    def _socket_timeout(func):
-        @wraps(func)
-        def __socket_timeout(*args, **kw):
-            old = socket.getdefaulttimeout()
-            socket.setdefaulttimeout(value)
-            try:
-                return func(*args, **kw)
-            finally:
-                socket.setdefaulttimeout(old)
-        return __socket_timeout
-    return _socket_timeout
-
-
-def _open_data(file):
-    return cv2.VideoCapture(str(file))
-
-
-@socket_timeout(120)
-def _query_activedata(query_json):
-    """Used to run queries on active data."""
-    active_data_url = "http://activedata.allizom.org/query"
-
-    req = urllib.request.Request(active_data_url)
-    req.add_header("Content-Type", "application/json")
-    jsondata = json.dumps(query_json)
-
-    jsondataasbytes = jsondata.encode("utf-8")
-    req.add_header("Content-Length", len(jsondataasbytes))
-
-    log.info("Querying Active-data...")
-    response = urllib.request.urlopen(req, jsondataasbytes)
-    log.info("Status: %s" % {str(response.getcode())})
-
-    data = json.loads(response.read().decode("utf8").replace("'", '"'))["data"]
-    return data
-
-
-@socket_timeout(120)
-def _download(url, loc):
-    """Downloads from a url (with a timeout)."""
-    log.info("Downloading %s" % url)
-    try:
-        urllib.request.urlretrieve(url, loc)
-    except Exception as e:
-        log.info(str(e))
-        return False
-    return True
-
-
-def _get_frames(video):
-    """Gets all frames from a video into a list."""
-    allframes = []
-    while video.isOpened():
-        ret, frame = video.read()
-        if ret:
-            # Convert to gray to simplify the process
-            allframes.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
-        else:
-            video.release()
-            break
-    return allframes
-
-
-def _get_browsertime_results(query):
-    """Used to run an AD query and extract the browsertime results if they exist."""
-    failed = False
-    try:
-        data = _query_activedata(query)
-    except Exception as e:
-        log.info(str(e))
-        failed = True
-    if failed or not data:
-        log.info("Couldn't get activedata data")
-        return None
-
-    # Find the newest browsertime task
-    log.info("Found %s datums" % str(len(data["action.start_time"])))
-    maxind = np.argmax([float(t) for t in data["action.start_time"]])
-    artifacts = data["task.artifacts"][maxind]
-    btime_artifact = None
-    for art in artifacts:
-        if "browsertime-results" in art["name"]:
-            btime_artifact = art["url"]
-            break
-    if not btime_artifact:
-        log.info("Can't find an older site test")
-        return None
-
-    log.info("Comparing videos to TASK_GROUP=%s, TASK_ID=%s" % (
-        data["task.group.id"][maxind], data["task.id"][maxind]
-    ))
-
-    # Download the browsertime videos and untar them
-    tmpdir = tempfile.mkdtemp()
-    loc = os.path.join(tmpdir, "tmpfile.tgz")
-    if not _download(btime_artifact, loc):
-        log.info(
-            "Failed to download browsertime-results artifact from %s" % btime_artifact
-        )
-        return None
-    tmploc = tempfile.mkdtemp()
-    try:
-        with tarfile.open(str(loc)) as tar:
-            tar.extractall(path=tmploc)
-    except Exception:
-        log.info(
-            "Could not read/extract old browsertime results archive",
-            path=loc,
-            exc_info=True,
-        )
-        return None
-
-    return tmploc
-
-
-def _data_from_last_task(label):
-    """Gets the data from the last PGO/OPT task with the same label.
-
-    We look for both OPT and PGO tasks. The difference
-    between them should be minimal. This method also provides
-    a way to compare recordings from this task to another
-    known task based on the TC_GROUP_ID environment varible.
-    """
-    label_opt = label.replace("/pgo", "/opt")
-    label_pgo = label.replace("/opt", "/pgo")
-
-    base_ad_query["where"]["and"] = [
-        {"in": {"task.run.state": ["completed"]}},
-        {"or": [
-            {"eq": {"run.name": label_pgo}},
-            {"eq": {"run.name": label_opt}}
-        ]}
-    ]
-
-    task_group_id = os.getenv("TC_GROUP_ID", "")
-    if task_group_id:
-        base_ad_query["where"]["and"].append(
-            {"eq": {"task.group.id": task_group_id}}
-        )
-    else:
-        base_ad_query["where"]["and"].extend([
-            {"in": {"repo.branch.name": ["mozilla-central"]}},
-            {"gte": {"action.start_time": {"date": "today-week-week"}}},
-        ])
-
-    return _get_browsertime_results(base_ad_query)
-
-
-def _data_from_last_live_task(label):
-    """Gets the data from the last live site PGO task."""
-    label_live = label.replace("/opt", "/pgo").replace("tp6m", "tp6m-live")
-
-    base_ad_query["where"]["and"] = [
-        {"in": {"repo.branch.name": ["mozilla-central"]}},
-        {"gte": {"action.start_time": {"date": "today-week-week"}}},
-        {"in": {"task.run.state": ["completed"]}},
-        {"eq": {"run.name": label_live}},
-    ]
-
-    return _get_browsertime_results(base_ad_query)
-
-
-def _get_similarity(old_videos_info, new_videos_info, output, prefix=""):
-    """Calculates a similarity score for two groupings of videos.
-
-    The technique works as follows:
-        1. Get the last live site test.
-        2. For each 15x15 video pairings, build a cross-correlation matrix:
-            1. Get each of the videos and calculate their histograms
-               across the full videos.
-            2. Calculate the correlation coefficient between these two.
-        3. Average the cross-correlation matrix to obtain the score.
-
-    The 2D similarity score is the same, except that it builds a histogram
-    from the final frame instead of the full video.
-
-    Args:
-        old_videos: List of old videos.
-        new_videos: List of new videos (from this task).
-        output: Location to output videos with low similarity scores.
-        prefix: Prefix a string to the output.
-    Returns:
-        Two similarity scores (3D, 2D) as a float.
-    """
-    nhists = []
-    nhists2d = []
-
-    old_videos = [entry["data"] for entry in old_videos_info]
-    new_videos = [entry["data"] for entry in new_videos_info]
-
-    total_vids = min(len(old_videos), len(new_videos))
-    xcorr = np.zeros((total_vids, total_vids))
-    xcorr2d = np.zeros((total_vids, total_vids))
-
-    for i in range(total_vids):
-        datao = np.asarray(_get_frames(old_videos[i]))
-
-        histo, _, _ = plt.hist(datao.flatten(), bins=255)
-        histo2d, _, _ = plt.hist(datao[-1, :, :].flatten(), bins=255)
-
-        for j in range(total_vids):
-            if i == 0:
-                # Only calculate the histograms once; it takes time
-                datan = np.asarray(_get_frames(new_videos[j]))
-
-                histn, _, _ = plt.hist(datan.flatten(), bins=255)
-                histn2d, _, _ = plt.hist(datan[-1, :, :].flatten(), bins=255)
-
-                nhists.append(histn)
-                nhists2d.append(histn2d)
-            else:
-                histn = nhists[j]
-                histn2d = nhists2d[j]
-
-            rho, _ = spearmanr(histn, histo)
-            rho2d, _ = spearmanr(histn2d, histo2d)
-
-            xcorr[i, j] = rho
-            xcorr2d[i, j] = rho2d
-
-    similarity = np.mean(xcorr)
-    similarity2d = np.mean(xcorr2d)
-
-    log.info("Average 3D similarity: %s" % str(np.round(similarity, 5)))
-    log.info("Average 2D similarity: %s" % str(np.round(similarity2d, 5)))
-
-    if np.round(similarity, 1) <= 0.7 or np.round(similarity2d, 1) <= 0.7:
-        # For low correlations, output the worst video pairing
-        # so that we can visually see what the issue was
-        minind = np.unravel_index(np.argmin(xcorr, axis=None), xcorr.shape)
-
-        oldvid = old_videos_info[minind[0]]["path"]
-        shutil.copyfile(oldvid, str(pathlib.Path(output, "%sold_video.mp4" % prefix)))
-
-        newvid = new_videos_info[minind[1]]["path"]
-        shutil.copyfile(newvid, str(pathlib.Path(output, "%snew_video.mp4" % prefix)))
-
-    return np.round(similarity, 5), np.round(similarity2d, 5)
-
-
-def calculate_similarity(jobs_json, fetch_dir, output):
-    """Calculates the similarity score for this task.
-
-    Here we use activedata to find the last live site that ran and
-    to find the last task (with the same label) that ran. Those two
-    tasks are then compared to the current one and 4 metrics are produced.
-
-    For live sites, we only calculate 2 of these metrics, since the
-    playback similarity is not applicable to it.
-
-    Args:
-        jobs_json: The jobs JSON that holds extra information.
-        fetch_dir: The fetch directory that holds the new videos.
-        output: The output directory.
-    Returns:
-        A dictionary containing up to 4 different metrics (their values default
-        to None if a metric couldn't be calculated):
-            PlaybackSimilarity: Similarity of the full playback to a live site test.
-            PlaybackSimilarity2D: - // - (but for the final frame only)
-            Similarity: Similarity of the tests video recording to its last run.
-            Similarity2D: - // - (but for the final frame only)
-    """
-    global log
-    log = structlog.get_logger()
-
-    label = os.getenv("TC_LABEL", "")
-    if not label:
-        log.info("TC_LABEL is undefined, cannot calculate similarity metrics")
-        return {}
-
-    # Get all the newest videos from this task
-    new_btime_videos = [
-        {"data": _open_data(str(f)), "path": str(f)}
-        for f in pathlib.Path(fetch_dir).rglob("*.mp4")
-    ]
-    log.info("Found %s new videos" % str(len(new_btime_videos)))
-
-    # Get the similarity against the last task
-    old_btime_res = _data_from_last_task(label)
-    old_sim = old_sim2d = None
-    if old_btime_res:
-        old_btime_videos = [
-            {"data": _open_data(str(f)), "path": str(f)}
-            for f in pathlib.Path(old_btime_res).rglob("*.mp4")
-        ]
-        log.info("Found %s old videos" % str(len(old_btime_videos)))
-
-        old_sim, old_sim2d = _get_similarity(
-            old_btime_videos, new_btime_videos, output
-        )
-    else:
-        log.info("Failed to find an older test task")
-
-    # Compare recordings to their live site variant if it exists
-    live_sim = live_sim2d = None
-    if "live" not in jobs_json["extra_options"]:
-        live_btime_res = _data_from_last_live_task(label)
-        if live_btime_res:
-            live_btime_videos = [
-                {"data": _open_data(str(f)), "path": str(f)}
-                for f in pathlib.Path(live_btime_res).rglob("*.mp4")
-            ]
-            log.info("Found %s live videos" % str(len(live_btime_videos)))
-
-            live_sim, live_sim2d = _get_similarity(
-                live_btime_videos, new_btime_videos, output, prefix="live_"
-            )
-        else:
-            log.info("Failed to find a live site variant")
-
-    return {
-        "PlaybackSimilarity": live_sim,
-        "PlaybackSimilarity2D": live_sim2d,
-        "Similarity": old_sim,
-        "Similarity2D": old_sim2d,
-    }
--- a/taskcluster/fenix_taskgraph/transforms/browsertime.py
+++ b/taskcluster/fenix_taskgraph/transforms/browsertime.py
@ -134,8 +134,8 @@ def build_browsertime_task(config, tasks):
        run_visual_metrics = task.pop("run-visual-metrics", False)
        if run_visual_metrics:
            task["run"]["command"].append("--browsertime-video")
+            task["run"]["command"].append("--browsertime-visualmetrics")
            task["run"]["command"].append("--browsertime-no-ffwindowrecorder")
-            task["attributes"]["run-visual-metrics"] = True

        # Build taskcluster group and symol
        task["treeherder"]["symbol"] = "Btime(%s)" % symbol
--- a/taskcluster/fenix_taskgraph/transforms/visual_metrics.py
+++ b/taskcluster/fenix_taskgraph/transforms/visual_metrics.py
@ -1,91 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-"""
-Generate labels for tasks without names, consistently.
-Uses attributes from `primary-dependency`.
-"""
-
-from taskgraph.transforms.base import TransformSequence
-
-transforms = TransformSequence()
-
-SYMBOL = "{groupSymbol}({symbol}-vismet)"
-# the test- prefix makes the task SETA-optimized.
-LABEL = "test-vismet-{platform}-{label}"
-
-
-@transforms.add
-def make_label(config, jobs):
-    """Generate a sane label for a new task constructed from a dependency
-    Using attributes from the dependent job and the current task kind"""
-    for job in jobs:
-        dep_job = job["primary-dependency"]
-        attr = dep_job.attributes.get
-
-        if attr("locale", job.get("locale")):
-            template = "{kind}-{locale}-{build_platform}/{build_type}"
-        elif attr("l10n_chunk"):
-            template = "{kind}-{build_platform}-{l10n_chunk}/{build_type}"
-        elif config.kind.startswith("release-eme-free") or config.kind.startswith(
-            "release-partner-repack"
-        ):
-            suffix = job.get("extra", {}).get("repack_suffix", None) or job.get(
-                "extra", {}
-            ).get("repack_id", None)
-            template = "{kind}-{build_platform}"
-            if suffix:
-                template += "-{}".format(suffix.replace("/", "-"))
-        else:
-            template = "{kind}-{build_platform}/{build_type}"
-        job["label"] = template.format(
-            kind=config.kind,
-            build_platform=attr("build_platform"),
-            build_type=attr("build_type"),
-            locale=attr("locale", job.get("locale", "")),  # Locale can be absent
-            l10n_chunk=attr("l10n_chunk", ""),  # Can be empty
-        )
-
-        yield job
-
-
-@transforms.add
-def run_visual_metrics(config, jobs):
-    for job in jobs:
-        dep_job = job.pop("primary-dependency", None)
-        if dep_job is not None:
-            platform = dep_job.task["extra"]["treeherder-platform"]
-            job["dependencies"] = {dep_job.label: dep_job.label}
-
-            # Add the artifact to be processed as a fetches artifact
-            job["fetches"][dep_job.label] = [
-                {"artifact": "browsertime-results.tgz", "extract": True}
-            ]
-
-            # vismet runs on Linux but we want to have it displayed
-            # alongside the job it was triggered by to make it easier for
-            # people to find it back.
-            job["label"] = LABEL.format(platform=platform, label=dep_job.label)
-            treeherder_info = dict(dep_job.task["extra"]["treeherder"])
-            job["treeherder"]["platform"] = platform
-            job["treeherder"]["symbol"] = SYMBOL.format(
-                groupSymbol=treeherder_info["groupSymbol"],
-                symbol=treeherder_info["symbol"],
-            )
-
-            # Store the platform name so we can use it to calculate
-            # the similarity metric against other tasks
-            job["worker"].setdefault("env", {})["TC_PLATFORM"] = platform
-
-            # run-on-projects needs to be set based on the dependent task
-            attributes = dict(dep_job.attributes)
-            job["run-on-projects"] = attributes["run_on_projects"]
-
-            # The run-on-tasks-for also needs to be setup here
-            job["run-on-tasks-for"] = attributes.get("run_on_tasks_for", [])
-
-            # We can't use the multi_dep transforms which remove this
-            # field, so we remove the dependent-tasks entry here
-            del job["dependent-tasks"]
-
-            yield job