Run visual metrics processing within the test task.

pull/543/head
gmierz 2 years ago committed by mergify[bot]
parent 5597ed6e98
commit 9ed5e55318

@ -37,7 +37,7 @@ job-defaults:
subject: '[{product_name}] Raptor-Browsertime job "{task_name}" failed'
to-addresses: [perftest-alerts@mozilla.com]
default: {}
run-on-tasks-for: []
run-on-tasks-for: [github-pull-request]
treeherder:
kind: test
tier: 2
@ -98,7 +98,7 @@ job-defaults:
- linux64-ffmpeg-4.1.4
- linux64-geckodriver
- linux64-minidump-stackwalk
- linux64-node
- linux64-node-16
jobs:
tp6m:

@ -22,6 +22,3 @@ jobs:
ui-tests:
parent: base
symbol: I(ui-tests)
visual-metrics:
parent: base
symbol: I(visual-metrics)

@ -49,10 +49,10 @@ linux64-node:
index-search:
- gecko.cache.level-3.toolchains.v3.linux64-node-12.latest
visual-metrics:
linux64-node-16:
attributes:
toolchain-artifact: public/visualmetrics.py
description: "Browsertime visual metrics analysis script"
toolchain-artifact: public/build/node.tar.zst
description: "Node.js toolchain"
run:
index-search:
- gecko.cache.level-3.content.v1.visual-metrics.latest
- gecko.cache.level-3.toolchains.v3.linux64-node-16.latest

@ -1,51 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
---
loader: fenix_taskgraph.loader.multi_dep:loader
kind-dependencies:
- browsertime
- toolchain
primary-dependency:
- browsertime
group-by: attributes
only-for-attributes:
- run-visual-metrics
transforms:
- fenix_taskgraph.transforms.visual_metrics:transforms
- taskgraph.transforms.job:transforms
- taskgraph.transforms.task:transforms
job-template:
attributes:
nightly: true
description: "Run visual metrics calculations on Raptor"
run-on-projects: []
run-on-tasks-for: []
worker-type: b-android
treeherder:
tier: 2
kind: other
worker:
docker-image: {in-tree: visual-metrics}
max-run-time: 900
artifacts:
- type: file
name: public/perfherder-data.json
path: /builds/worker/artifacts/perfherder-data.json
- type: file
name: public/summary.json
path: /builds/worker/artifacts/summary.json
fetches:
toolchain:
- visual-metrics
run:
using: run-task
command: /builds/worker/bin/run-visual-metrics.py -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport
checkout: false
run-as-root: true

@ -1,30 +0,0 @@
FROM $DOCKER_IMAGE_PARENT
MAINTAINER Gregory Mierzwinski <gmierzwinski@mozilla.com>
# run-task expects to run as root
USER root
RUN apt-get update -qq && \
apt-get install -y \
ffmpeg \
imagemagick \
pyssim \
python \
python-pil
WORKDIR /builds/worker
USER worker:worker
COPY requirements.txt /builds/worker/requirements.txt
RUN pip3 install --require-hashes -r /builds/worker/requirements.txt && \
rm /builds/worker/requirements.txt
COPY similarity.py /builds/worker/bin/similarity.py
COPY run-visual-metrics.py /builds/worker/bin/run-visual-metrics.py
COPY performance-artifact-schema.json /builds/worker/performance-artifact-schema.json
USER root
RUN chmod +x /builds/worker/bin/run-visual-metrics.py
VOLUME /builds/worker/artifacts/

@ -1,230 +0,0 @@
{
"definitions": {
"application_schema": {
"properties": {
"name": {
"title": "Application under performance test",
"enum": [
"firefox",
"chrome",
"chrome-m",
"chromium",
"fennec",
"geckoview",
"refbrow",
"fenix"
],
"maxLength": 10,
"type": "string"
},
"version": {
"title": "Application's version",
"maxLength": 40,
"type": "string"
}
},
"required": ["name"],
"type": "object"
},
"framework_schema": {
"properties": {
"name": {
"title": "Framework name",
"type": "string"
}
},
"type": "object"
},
"subtest_schema": {
"properties": {
"name": {
"title": "Subtest name",
"type": "string"
},
"publicName": {
"title": "Public subtest name",
"description": "Allows renaming test's name, without breaking existing performance data series",
"maxLength": 30,
"type": "string"
},
"value": {
"description": "Summary value for subtest",
"title": "Subtest value",
"type": "number",
"minimum": -1000000000000.0,
"maximum": 1000000000000.0
},
"unit": {
"title": "Measurement unit",
"type": "string",
"minLength": 1,
"maxLength": 20
},
"lowerIsBetter": {
"description": "Whether lower values are better for subtest",
"title": "Lower is better",
"type": "boolean"
},
"shouldAlert": {
"description": "Whether we should alert",
"title": "Should alert",
"type": "boolean"
},
"alertThreshold": {
"description": "% change threshold before alerting",
"title": "Alert threshold",
"type": "number",
"minimum": 0.0,
"maximum": 1000.0
},
"minBackWindow": {
"description": "Minimum back window to use for alerting",
"title": "Minimum back window",
"type": "number",
"minimum": 1,
"maximum": 255
},
"maxBackWindow": {
"description": "Maximum back window to use for alerting",
"title": "Maximum back window",
"type": "number",
"minimum": 1,
"maximum": 255
},
"foreWindow": {
"description": "Fore window to use for alerting",
"title": "Fore window",
"type": "number",
"minimum": 1,
"maximum": 255
}
},
"required": [
"name",
"value"
],
"type": "object"
},
"suite_schema": {
"properties": {
"name": {
"title": "Suite name",
"type": "string"
},
"publicName": {
"title": "Public suite name",
"description": "Allows renaming suite's name, without breaking existing performance data series",
"maxLength": 30,
"type": "string"
},
"tags": {
"type": "array",
"title": "Free form tags, which ease the grouping & searching of performance tests",
"description": "Similar to extraOptions, except it does not break existing performance data series",
"items": {
"type": "string",
"pattern": "^[a-zA-Z0-9-]{1,24}$"
},
"uniqueItems": true,
"maxItems": 14
},
"extraOptions": {
"type": "array",
"title": "Extra options used in running suite",
"items": {
"type": "string",
"maxLength": 100
},
"uniqueItems": true,
"maxItems": 8
},
"subtests": {
"items": {
"$ref": "#/definitions/subtest_schema"
},
"title": "Subtests",
"type": "array"
},
"value": {
"title": "Suite value",
"type": "number",
"minimum": -1000000000000.0,
"maximum": 1000000000000.0
},
"unit": {
"title": "Measurement unit",
"type": "string",
"minLength": 1,
"maxLength": 20
},
"lowerIsBetter": {
"description": "Whether lower values are better for suite",
"title": "Lower is better",
"type": "boolean"
},
"shouldAlert": {
"description": "Whether we should alert on this suite (overrides default behaviour)",
"title": "Should alert",
"type": "boolean"
},
"alertThreshold": {
"description": "% change threshold before alerting",
"title": "Alert threshold",
"type": "number",
"minimum": 0.0,
"maximum": 1000.0
},
"minBackWindow": {
"description": "Minimum back window to use for alerting",
"title": "Minimum back window",
"type": "integer",
"minimum": 1,
"maximum": 255
},
"maxBackWindow": {
"description": "Maximum back window to use for alerting",
"title": "Maximum back window",
"type": "integer",
"minimum": 1,
"maximum": 255
},
"foreWindow": {
"description": "Fore window to use for alerting",
"title": "Fore window",
"type": "integer",
"minimum": 1,
"maximum": 255
}
},
"required": [
"name",
"subtests"
],
"type": "object"
}
},
"description": "Structure for submitting performance data as part of a job",
"id": "https://treeherder.mozilla.org/schemas/v1/performance-artifact.json#",
"properties": {
"application":{
"$ref": "#/definitions/application_schema"
},
"framework": {
"$ref": "#/definitions/framework_schema"
},
"suites": {
"description": "List of suite-level data submitted as part of this structure",
"items": {
"$ref": "#/definitions/suite_schema"
},
"title": "Performance suites",
"type": "array"
}
},
"required": [
"framework",
"suites"
],
"title": "Perfherder Schema",
"type": "object"
}

@ -1,23 +0,0 @@
# Dependency hashes must be for python3.6
# Direct dependencies
attrs==19.1.0 --hash=sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79
structlog==19.1.0 --hash=sha256:db441b81c65b0f104a7ce5d86c5432be099956b98b8a2c8be0b3fb3a7a0b1536
voluptuous==0.11.5 --hash=sha256:303542b3fc07fb52ec3d7a1c614b329cdbee13a9d681935353d8ea56a7bfa9f1
jsonschema==3.2.0 --hash=sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163
numpy==1.18.3 --hash=sha256:a551d8cc267c634774830086da42e4ba157fa41dd3b93982bc9501b284b0c689
scipy==1.4.1 --hash=sha256:386086e2972ed2db17cebf88610aab7d7f6e2c0ca30042dc9a89cf18dcc363fa
matplotlib==3.0.3 --hash=sha256:e8d1939262aa6b36d0c51f50a50a43a04b9618d20db31e6c0192b1463067aeef
opencv-python==4.2.0.34 --hash=sha256:dcb8da8c5ebaa6360c8555547a4c7beb6cd983dd95ba895bb78b86cc8cf3de2b
# Transitive dependencies
importlib_metadata==1.1.0 --hash=sha256:e6ac600a142cf2db707b1998382cc7fc3b02befb7273876e01b8ad10b9652742
more_itertools==8.0.0 --hash=sha256:a0ea684c39bc4315ba7aae406596ef191fd84f873d2d2751f84d64e81a7a2d45
pyrsistent==0.15.6 --hash=sha256:f3b280d030afb652f79d67c5586157c5c1355c9a58dfc7940566e28d28f3df1b
six==1.12.0 --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c
zipp==0.6.0 --hash=sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335
cycler==0.10.0 --hash=sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d
kiwisolver==1.1.0 --hash=sha256:400599c0fe58d21522cae0e8b22318e09d9729451b17ee61ba8e1e7c0346565c
pyparsing==2.4.7 --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b
python-dateutil==2.8.1 --hash=sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a
setuptools==46.1.3 --hash=sha256:4fe404eec2738c20ab5841fa2d791902d2a645f32318a7850ef26f8d7215a8ee

@ -1,496 +0,0 @@
#!/usr/bin/env python3
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""Instrument visualmetrics.py to run in parallel."""
import argparse
import json
import logging
import os
import statistics
import subprocess
import sys
import tarfile
import time
from concurrent.futures import ProcessPoolExecutor
from functools import partial
from multiprocessing import cpu_count
from pathlib import Path
import attr
import structlog
from jsonschema import validate
from voluptuous import ALLOW_EXTRA, Required, Schema
#: The max run time for a command (5 minutes)
MAX_TIME = 300
#: The directory where artifacts from this job will be placed.
OUTPUT_DIR = Path("/", "builds", "worker", "artifacts")
#: A job to process through visualmetrics.py
@attr.s
class Job:
#: The name of the test.
test_name = attr.ib(type=str)
#: A unique number for the job.
count = attr.ib(type=int)
#: The tags for this job.
tags = attr.ib(type=str)
#: The extra options for this job.
extra_options = attr.ib(type=str)
#: If true, we allow 0's in the vismet results
accept_zero_vismet = attr.ib(type=bool)
#: json_path: The path to the ``browsertime.json`` file on disk.
json_path = attr.ib(type=Path)
#: video_path: The path of the video file on disk.
video_path = attr.ib(type=Path)
#: The schema for validating jobs.
JOB_SCHEMA = Schema(
{
Required("jobs"): [
{
Required("test_name"): str,
Required("browsertime_json_path"): str,
Required("tags"): [str],
Required("extra_options"): [str],
Required("accept_zero_vismet"): bool,
}
],
Required("application"): {Required("name"): str, "version": str},
Required("extra_options"): [str],
}
)
#: A partial schema for browsertime.json files.
BROWSERTIME_SCHEMA = Schema(
[{Required("files"): {Required("video"): [str]}}], extra=ALLOW_EXTRA
)
SHOULD_ALERT = {
"ContentfulSpeedIndex": True,
"FirstVisualChange": True,
"LastVisualChange": True,
"PerceptualSpeedIndex": True,
"SpeedIndex": True,
"videoRecordingStart": False,
}
with Path("/", "builds", "worker", "performance-artifact-schema.json").open() as f:
PERFHERDER_SCHEMA = json.loads(f.read())
def run_command(log, cmd, job_count):
"""Run a command using subprocess.check_output
Args:
log: The structlog logger instance.
cmd: the command to run as a list of strings.
Returns:
A tuple of the process' exit status and standard output.
"""
log.info("Running command", cmd=cmd)
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
lines = []
res = None
start = time.time()
while time.time() - start <= MAX_TIME:
time.sleep(0.1)
output = process.stdout.readline()
if output == b"" and process.poll() is not None:
break
if output:
res = output.strip()
lines.append(res.decode("utf-8", "ignore"))
else:
time.sleep(5)
if time.time() - start > MAX_TIME:
log.error(
"TEST-UNEXPECTED-FAIL | Timed out waiting for response from command",
cmd=cmd,
)
return 1, "Timed out"
rc = process.poll()
job_prefix = "[JOB-" + str(job_count) + "] "
for line in lines:
# Some output doesn't start with the levels because it comes
# from FFMPEG rather than the script itself
if line.startswith(("[INFO]", "[WARNING]", "[CRITICAL]", "[ERROR]")):
splitline = line.split(" - ")
level = splitline[0]
line = " - ".join(splitline[1:])
else:
level = "[INFO]"
newline = job_prefix + line
if level.strip() in ("[ERROR]", "[CRITICAL]"):
if rc == 0:
rc = 1
log.error("TEST-UNEXPECTED-FAIL | " + newline)
elif level == "[WARNING]":
log.warning(newline)
else:
log.info(newline)
return rc, res
def append_result(log, suites, test_name, name, result, tags, extra_options):
"""Appends a ``name`` metrics result in the ``test_name`` suite.
Args:
log: The structlog logger instance.
suites: A mapping containing the suites.
test_name: The name of the test.
name: The name of the metrics.
result: The value to append.
"""
if name.endswith("Progress"):
return
try:
result = int(result)
except ValueError:
log.error("Could not convert value", name=name)
log.error("%s" % result)
result = 0
orig_test_name = test_name
if test_name in suites and suites[test_name]["extraOptions"] != extra_options:
missing = set(extra_options) - set(suites[test_name]["extraOptions"])
test_name = test_name + "-".join(list(missing))
subtests = suites.setdefault(
test_name,
{
"name": orig_test_name,
"tags": extra_options + tags + ["visual"],
"subtests": {},
"extraOptions": extra_options,
},
)["subtests"]
if name not in subtests:
subtests[name] = {
"name": name,
"replicates": [result],
"lowerIsBetter": True,
"unit": "ms",
"shouldAlert": SHOULD_ALERT.get(name, False),
}
else:
subtests[name]["replicates"].append(result)
def compute_median(subtest):
"""Adds in the subtest the ``value`` field, which is the average of all
replicates.
Args:
subtest: The subtest containing all replicates.
Returns:
The subtest.
"""
if "replicates" not in subtest:
return subtest
subtest["value"] = statistics.median(subtest["replicates"])
return subtest
def get_suite(suite):
"""Returns the suite with computed medians in its subtests.
Args:
suite: The suite to convert.
Returns:
The suite.
"""
suite["subtests"] = [
compute_median(subtest) for subtest in suite["subtests"].values()
]
return suite
def read_json(json_path, schema):
"""Read the given json file and verify against the provided schema.
Args:
json_path: Path of json file to parse.
schema: A callable to validate the JSON's schema.
Returns:
The contents of the file at ``json_path`` interpreted as JSON.
"""
try:
with open(str(json_path), "r", encoding="utf-8", errors="ignore") as f:
data = json.load(f)
except Exception:
log.error("Could not read JSON file", path=json_path, exc_info=True)
raise
log.info("Loaded JSON from file", path=json_path)
try:
schema(data)
except Exception:
log.error("JSON failed to validate", exc_info=True)
raise
return data
def main(log, args):
"""Run visualmetrics.py in parallel.
Args:
log: The structlog logger instance.
args: The parsed arguments from the argument parser.
Returns:
The return code that the program will exit with.
"""
fetch_dir = os.getenv("MOZ_FETCHES_DIR")
if not fetch_dir:
log.error("Expected MOZ_FETCHES_DIR environment variable.")
return 1
fetch_dir = Path(fetch_dir)
visualmetrics_path = fetch_dir / "visualmetrics.py"
if not visualmetrics_path.exists():
log.error(
"Could not locate visualmetrics.py", expected_path=str(visualmetrics_path)
)
return 1
browsertime_results_path = fetch_dir / "browsertime-results.tgz"
try:
with tarfile.open(str(browsertime_results_path)) as tar:
tar.extractall(path=str(fetch_dir))
except Exception:
log.error(
"Could not read/extract browsertime results archive",
path=browsertime_results_path,
exc_info=True,
)
return 1
log.info("Extracted browsertime results", path=browsertime_results_path)
try:
jobs_json_path = fetch_dir / "browsertime-results" / "jobs.json"
jobs_json = read_json(jobs_json_path, JOB_SCHEMA)
except Exception:
log.error(
"Could not open the jobs.json file", path=jobs_json_path, exc_info=True
)
return 1
jobs = []
count = 0
for job in jobs_json["jobs"]:
browsertime_json_path = fetch_dir / job["browsertime_json_path"]
try:
browsertime_json = read_json(browsertime_json_path, BROWSERTIME_SCHEMA)
except Exception:
log.error(
"Could not open a browsertime.json file",
path=browsertime_json_path,
exc_info=True,
)
return 1
for site in browsertime_json:
for video in site["files"]["video"]:
count += 1
name = job["test_name"]
if "alias" in site["info"] and site["info"]["alias"].strip() != "":
name = "%s.%s" % (name, site["info"]["alias"])
jobs.append(
Job(
test_name=name,
tags=job["tags"],
extra_options=len(job["extra_options"]) > 0
and job["extra_options"]
or jobs_json["extra_options"],
accept_zero_vismet=job["accept_zero_vismet"],
json_path=browsertime_json_path,
video_path=browsertime_json_path.parent / video,
count=count,
)
)
failed_runs = 0
suites = {}
with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
for job, result in zip(
jobs,
executor.map(
partial(
run_visual_metrics,
visualmetrics_path=visualmetrics_path,
options=args.visual_metrics_options,
),
jobs,
),
):
returncode, res = result
if returncode != 0:
log.error(
"Failed to run visualmetrics.py",
video_path=job.video_path,
error=res,
)
failed_runs += 1
else:
for name, value in res.items():
append_result(
log,
suites,
job.test_name,
name,
value,
job.tags,
job.extra_options,
)
suites = [get_suite(suite) for suite in suites.values()]
perf_data = {
"framework": {"name": "browsertime"},
"application": jobs_json["application"],
"type": "pageload",
"suites": suites,
}
# TODO: Try to get the similarity for all possible tests, this means that we
# will also get a comparison of recorded vs. live sites to check the on-going
# quality of our recordings.
# Bug 1674927 - Similarity metric is disabled until we figure out
# why it had a huge increase in run time.
# Validates the perf data complies with perfherder schema.
# The perfherder schema uses jsonschema so we can't use voluptuous here.
validate(perf_data, PERFHERDER_SCHEMA)
raw_perf_data = json.dumps(perf_data)
with Path(OUTPUT_DIR, "perfherder-data.json").open("w") as f:
f.write(raw_perf_data)
# Prints the data in logs for Perfherder to pick it up.
log.info("PERFHERDER_DATA: %s" % raw_perf_data)
# Lists the number of processed jobs, failures, and successes.
with Path(OUTPUT_DIR, "summary.json").open("w") as f:
json.dump(
{
"total_jobs": len(jobs),
"successful_runs": len(jobs) - failed_runs,
"failed_runs": failed_runs,
},
f,
)
# If there's one failure along the way, we want to return > 0
# to trigger a red job in TC.
return failed_runs
def run_visual_metrics(job, visualmetrics_path, options):
"""Run visualmetrics.py on the input job.
Returns:
A returncode and a string containing the output of visualmetrics.py
"""
cmd = [
"/usr/bin/python",
str(visualmetrics_path),
"-vvv",
"--logformat",
"[%(levelname)s] - %(message)s",
"--video",
str(job.video_path),
]
cmd.extend(options)
rc, res = run_command(log, cmd, job.count)
if rc == 0:
# Python 3.5 requires a str object (not 3.6+)
res = json.loads(res.decode("utf8"))
failed_tests = []
if not job.accept_zero_vismet:
# Ensure that none of these values are at 0 which
# is indicative of a failling test
monitored_tests = [
"contentfulspeedindex",
"lastvisualchange",
"perceptualspeedindex",
"speedindex",
]
for metric, val in res.items():
if metric.lower() in monitored_tests and val == 0:
failed_tests.append(metric)
if failed_tests:
log.error(
"TEST-UNEXPECTED-FAIL | Some visual metrics have an erroneous value of 0."
)
log.info("Tests which failed: %s" % str(failed_tests))
rc += 1
return rc, res
if __name__ == "__main__":
logging.basicConfig(format="%(levelname)s - %(message)s", level=logging.INFO)
structlog.configure(
processors=[
structlog.processors.format_exc_info,
structlog.dev.ConsoleRenderer(colors=False),
],
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
"visual_metrics_options",
type=str,
metavar="VISUAL-METRICS-OPTIONS",
help="Options to pass to visualmetrics.py",
nargs="*",
)
args = parser.parse_args()
log = structlog.get_logger()
try:
sys.exit(main(log, args))
except Exception as e:
log.error("Unhandled exception: %s" % e, exc_info=True)
sys.exit(1)

@ -1,360 +0,0 @@
#!/usr/bin/env python3
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import cv2
import json
import numpy as np
import os
import pathlib
import shutil
import socket
import structlog
import tarfile
import tempfile
import urllib
from functools import wraps
from matplotlib import pyplot as plt
from scipy.stats import spearmanr
log = None
# We add the `and` conditions to it later
base_ad_query = {
"from": "task",
"limit": 1000,
"where": {
"and": []
},
"select": [
"action.start_time",
"run.name",
"task.artifacts",
"task.group.id",
"task.id"
],
}
def socket_timeout(value=120):
"""Decorator for socket timeouts."""
def _socket_timeout(func):
@wraps(func)
def __socket_timeout(*args, **kw):
old = socket.getdefaulttimeout()
socket.setdefaulttimeout(value)
try:
return func(*args, **kw)
finally:
socket.setdefaulttimeout(old)
return __socket_timeout
return _socket_timeout
def _open_data(file):
return cv2.VideoCapture(str(file))
@socket_timeout(120)
def _query_activedata(query_json):
"""Used to run queries on active data."""
active_data_url = "http://activedata.allizom.org/query"
req = urllib.request.Request(active_data_url)
req.add_header("Content-Type", "application/json")
jsondata = json.dumps(query_json)
jsondataasbytes = jsondata.encode("utf-8")
req.add_header("Content-Length", len(jsondataasbytes))
log.info("Querying Active-data...")
response = urllib.request.urlopen(req, jsondataasbytes)
log.info("Status: %s" % {str(response.getcode())})
data = json.loads(response.read().decode("utf8").replace("'", '"'))["data"]
return data
@socket_timeout(120)
def _download(url, loc):
"""Downloads from a url (with a timeout)."""
log.info("Downloading %s" % url)
try:
urllib.request.urlretrieve(url, loc)
except Exception as e:
log.info(str(e))
return False
return True
def _get_frames(video):
"""Gets all frames from a video into a list."""
allframes = []
while video.isOpened():
ret, frame = video.read()
if ret:
# Convert to gray to simplify the process
allframes.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
else:
video.release()
break
return allframes
def _get_browsertime_results(query):
"""Used to run an AD query and extract the browsertime results if they exist."""
failed = False
try:
data = _query_activedata(query)
except Exception as e:
log.info(str(e))
failed = True
if failed or not data:
log.info("Couldn't get activedata data")
return None
# Find the newest browsertime task
log.info("Found %s datums" % str(len(data["action.start_time"])))
maxind = np.argmax([float(t) for t in data["action.start_time"]])
artifacts = data["task.artifacts"][maxind]
btime_artifact = None
for art in artifacts:
if "browsertime-results" in art["name"]:
btime_artifact = art["url"]
break
if not btime_artifact:
log.info("Can't find an older site test")
return None
log.info("Comparing videos to TASK_GROUP=%s, TASK_ID=%s" % (
data["task.group.id"][maxind], data["task.id"][maxind]
))
# Download the browsertime videos and untar them
tmpdir = tempfile.mkdtemp()
loc = os.path.join(tmpdir, "tmpfile.tgz")
if not _download(btime_artifact, loc):
log.info(
"Failed to download browsertime-results artifact from %s" % btime_artifact
)
return None
tmploc = tempfile.mkdtemp()
try:
with tarfile.open(str(loc)) as tar:
tar.extractall(path=tmploc)
except Exception:
log.info(
"Could not read/extract old browsertime results archive",
path=loc,
exc_info=True,
)
return None
return tmploc
def _data_from_last_task(label):
"""Gets the data from the last PGO/OPT task with the same label.
We look for both OPT and PGO tasks. The difference
between them should be minimal. This method also provides
a way to compare recordings from this task to another
known task based on the TC_GROUP_ID environment varible.
"""
label_opt = label.replace("/pgo", "/opt")
label_pgo = label.replace("/opt", "/pgo")
base_ad_query["where"]["and"] = [
{"in": {"task.run.state": ["completed"]}},
{"or": [
{"eq": {"run.name": label_pgo}},
{"eq": {"run.name": label_opt}}
]}
]
task_group_id = os.getenv("TC_GROUP_ID", "")
if task_group_id:
base_ad_query["where"]["and"].append(
{"eq": {"task.group.id": task_group_id}}
)
else:
base_ad_query["where"]["and"].extend([
{"in": {"repo.branch.name": ["mozilla-central"]}},
{"gte": {"action.start_time": {"date": "today-week-week"}}},
])
return _get_browsertime_results(base_ad_query)
def _data_from_last_live_task(label):
"""Gets the data from the last live site PGO task."""
label_live = label.replace("/opt", "/pgo").replace("tp6m", "tp6m-live")
base_ad_query["where"]["and"] = [
{"in": {"repo.branch.name": ["mozilla-central"]}},
{"gte": {"action.start_time": {"date": "today-week-week"}}},
{"in": {"task.run.state": ["completed"]}},
{"eq": {"run.name": label_live}},
]
return _get_browsertime_results(base_ad_query)
def _get_similarity(old_videos_info, new_videos_info, output, prefix=""):
"""Calculates a similarity score for two groupings of videos.
The technique works as follows:
1. Get the last live site test.
2. For each 15x15 video pairings, build a cross-correlation matrix:
1. Get each of the videos and calculate their histograms
across the full videos.
2. Calculate the correlation coefficient between these two.
3. Average the cross-correlation matrix to obtain the score.
The 2D similarity score is the same, except that it builds a histogram
from the final frame instead of the full video.
Args:
old_videos: List of old videos.
new_videos: List of new videos (from this task).
output: Location to output videos with low similarity scores.
prefix: Prefix a string to the output.
Returns:
Two similarity scores (3D, 2D) as a float.
"""
nhists = []
nhists2d = []
old_videos = [entry["data"] for entry in old_videos_info]
new_videos = [entry["data"] for entry in new_videos_info]
total_vids = min(len(old_videos), len(new_videos))
xcorr = np.zeros((total_vids, total_vids))
xcorr2d = np.zeros((total_vids, total_vids))
for i in range(total_vids):
datao = np.asarray(_get_frames(old_videos[i]))
histo, _, _ = plt.hist(datao.flatten(), bins=255)
histo2d, _, _ = plt.hist(datao[-1, :, :].flatten(), bins=255)
for j in range(total_vids):
if i == 0:
# Only calculate the histograms once; it takes time
datan = np.asarray(_get_frames(new_videos[j]))
histn, _, _ = plt.hist(datan.flatten(), bins=255)
histn2d, _, _ = plt.hist(datan[-1, :, :].flatten(), bins=255)
nhists.append(histn)
nhists2d.append(histn2d)
else:
histn = nhists[j]
histn2d = nhists2d[j]
rho, _ = spearmanr(histn, histo)
rho2d, _ = spearmanr(histn2d, histo2d)
xcorr[i, j] = rho
xcorr2d[i, j] = rho2d
similarity = np.mean(xcorr)
similarity2d = np.mean(xcorr2d)
log.info("Average 3D similarity: %s" % str(np.round(similarity, 5)))
log.info("Average 2D similarity: %s" % str(np.round(similarity2d, 5)))
if np.round(similarity, 1) <= 0.7 or np.round(similarity2d, 1) <= 0.7:
# For low correlations, output the worst video pairing
# so that we can visually see what the issue was
minind = np.unravel_index(np.argmin(xcorr, axis=None), xcorr.shape)
oldvid = old_videos_info[minind[0]]["path"]
shutil.copyfile(oldvid, str(pathlib.Path(output, "%sold_video.mp4" % prefix)))
newvid = new_videos_info[minind[1]]["path"]
shutil.copyfile(newvid, str(pathlib.Path(output, "%snew_video.mp4" % prefix)))
return np.round(similarity, 5), np.round(similarity2d, 5)
def calculate_similarity(jobs_json, fetch_dir, output):
"""Calculates the similarity score for this task.
Here we use activedata to find the last live site that ran and
to find the last task (with the same label) that ran. Those two
tasks are then compared to the current one and 4 metrics are produced.
For live sites, we only calculate 2 of these metrics, since the
playback similarity is not applicable to it.
Args:
jobs_json: The jobs JSON that holds extra information.
fetch_dir: The fetch directory that holds the new videos.
output: The output directory.
Returns:
A dictionary containing up to 4 different metrics (their values default
to None if a metric couldn't be calculated):
PlaybackSimilarity: Similarity of the full playback to a live site test.
PlaybackSimilarity2D: - // - (but for the final frame only)
Similarity: Similarity of the tests video recording to its last run.
Similarity2D: - // - (but for the final frame only)
"""
global log
log = structlog.get_logger()
label = os.getenv("TC_LABEL", "")
if not label:
log.info("TC_LABEL is undefined, cannot calculate similarity metrics")
return {}
# Get all the newest videos from this task
new_btime_videos = [
{"data": _open_data(str(f)), "path": str(f)}
for f in pathlib.Path(fetch_dir).rglob("*.mp4")
]
log.info("Found %s new videos" % str(len(new_btime_videos)))
# Get the similarity against the last task
old_btime_res = _data_from_last_task(label)
old_sim = old_sim2d = None
if old_btime_res:
old_btime_videos = [
{"data": _open_data(str(f)), "path": str(f)}
for f in pathlib.Path(old_btime_res).rglob("*.mp4")
]
log.info("Found %s old videos" % str(len(old_btime_videos)))
old_sim, old_sim2d = _get_similarity(
old_btime_videos, new_btime_videos, output
)
else:
log.info("Failed to find an older test task")
# Compare recordings to their live site variant if it exists
live_sim = live_sim2d = None
if "live" not in jobs_json["extra_options"]:
live_btime_res = _data_from_last_live_task(label)
if live_btime_res:
live_btime_videos = [
{"data": _open_data(str(f)), "path": str(f)}
for f in pathlib.Path(live_btime_res).rglob("*.mp4")
]
log.info("Found %s live videos" % str(len(live_btime_videos)))
live_sim, live_sim2d = _get_similarity(
live_btime_videos, new_btime_videos, output, prefix="live_"
)
else:
log.info("Failed to find a live site variant")
return {
"PlaybackSimilarity": live_sim,
"PlaybackSimilarity2D": live_sim2d,
"Similarity": old_sim,
"Similarity2D": old_sim2d,
}

@ -134,8 +134,8 @@ def build_browsertime_task(config, tasks):
run_visual_metrics = task.pop("run-visual-metrics", False)
if run_visual_metrics:
task["run"]["command"].append("--browsertime-video")
task["run"]["command"].append("--browsertime-visualmetrics")
task["run"]["command"].append("--browsertime-no-ffwindowrecorder")
task["attributes"]["run-visual-metrics"] = True
# Build taskcluster group and symol
task["treeherder"]["symbol"] = "Btime(%s)" % symbol

@ -1,91 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Generate labels for tasks without names, consistently.
Uses attributes from `primary-dependency`.
"""
from taskgraph.transforms.base import TransformSequence
transforms = TransformSequence()
SYMBOL = "{groupSymbol}({symbol}-vismet)"
# the test- prefix makes the task SETA-optimized.
LABEL = "test-vismet-{platform}-{label}"
@transforms.add
def make_label(config, jobs):
"""Generate a sane label for a new task constructed from a dependency
Using attributes from the dependent job and the current task kind"""
for job in jobs:
dep_job = job["primary-dependency"]
attr = dep_job.attributes.get
if attr("locale", job.get("locale")):
template = "{kind}-{locale}-{build_platform}/{build_type}"
elif attr("l10n_chunk"):
template = "{kind}-{build_platform}-{l10n_chunk}/{build_type}"
elif config.kind.startswith("release-eme-free") or config.kind.startswith(
"release-partner-repack"
):
suffix = job.get("extra", {}).get("repack_suffix", None) or job.get(
"extra", {}
).get("repack_id", None)
template = "{kind}-{build_platform}"
if suffix:
template += "-{}".format(suffix.replace("/", "-"))
else:
template = "{kind}-{build_platform}/{build_type}"
job["label"] = template.format(
kind=config.kind,
build_platform=attr("build_platform"),
build_type=attr("build_type"),
locale=attr("locale", job.get("locale", "")), # Locale can be absent
l10n_chunk=attr("l10n_chunk", ""), # Can be empty
)
yield job
@transforms.add
def run_visual_metrics(config, jobs):
for job in jobs:
dep_job = job.pop("primary-dependency", None)
if dep_job is not None:
platform = dep_job.task["extra"]["treeherder-platform"]
job["dependencies"] = {dep_job.label: dep_job.label}
# Add the artifact to be processed as a fetches artifact
job["fetches"][dep_job.label] = [
{"artifact": "browsertime-results.tgz", "extract": True}
]
# vismet runs on Linux but we want to have it displayed
# alongside the job it was triggered by to make it easier for
# people to find it back.
job["label"] = LABEL.format(platform=platform, label=dep_job.label)
treeherder_info = dict(dep_job.task["extra"]["treeherder"])
job["treeherder"]["platform"] = platform
job["treeherder"]["symbol"] = SYMBOL.format(
groupSymbol=treeherder_info["groupSymbol"],
symbol=treeherder_info["symbol"],
)
# Store the platform name so we can use it to calculate
# the similarity metric against other tasks
job["worker"].setdefault("env", {})["TC_PLATFORM"] = platform
# run-on-projects needs to be set based on the dependent task
attributes = dict(dep_job.attributes)
job["run-on-projects"] = attributes["run_on_projects"]
# The run-on-tasks-for also needs to be setup here
job["run-on-tasks-for"] = attributes.get("run_on_tasks_for", [])
# We can't use the multi_dep transforms which remove this
# field, so we remove the dependent-tasks entry here
del job["dependent-tasks"]
yield job
Loading…
Cancel
Save