diff --git a/taskcluster/docker/visual-metrics/run-visual-metrics.py b/taskcluster/docker/visual-metrics/run-visual-metrics.py
old mode 100644
new mode 100755
index 14b15221f..799207a8d
--- a/taskcluster/docker/visual-metrics/run-visual-metrics.py
+++ b/taskcluster/docker/visual-metrics/run-visual-metrics.py
@@ -8,11 +8,13 @@
 
 import argparse
 import json
+import logging
 import os
 import statistics
 import subprocess
 import sys
 import tarfile
+import time
 from concurrent.futures import ProcessPoolExecutor
 from functools import partial
 from multiprocessing import cpu_count
@@ -24,6 +26,10 @@ from jsonschema import validate
 from voluptuous import ALLOW_EXTRA, Required, Schema
 
 
+#: The max run time for a command (5 minutes)
+MAX_TIME = 300
+
+
 #: The directory where artifacts from this job will be placed.
 OUTPUT_DIR = Path("/", "builds", "worker", "artifacts")
 
@@ -34,6 +40,9 @@ class Job:
     #: The name of the test.
     test_name = attr.ib(type=str)
 
+    #: A unique number for the job.
+    count = attr.ib(type=int)
+
     #: The extra options for this job.
     extra_options = attr.ib(type=str)
 
@@ -64,11 +73,20 @@ BROWSERTIME_SCHEMA = Schema(
     [{Required("files"): {Required("video"): [str]}}], extra=ALLOW_EXTRA
 )
 
+SHOULD_ALERT = {
+    "ContentfulSpeedIndex": True,
+    "FirstVisualChange": True,
+    "LastVisualChange": True,
+    "PerceptualSpeedIndex": True,
+    "SpeedIndex": True,
+    "videoRecordingStart": False,
+}
+
 with Path("/", "builds", "worker", "performance-artifact-schema.json").open() as f:
     PERFHERDER_SCHEMA = json.loads(f.read())
 
 
-def run_command(log, cmd):
+def run_command(log, cmd, job_count):
     """Run a command using subprocess.check_output
 
     Args:
@@ -79,13 +97,52 @@ def run_command(log, cmd):
         A tuple of the process' exit status and standard output.
     """
     log.info("Running command", cmd=cmd)
-    try:
-        res = subprocess.check_output(cmd)
-        log.info("Command succeeded", result=res)
-        return 0, res
-    except subprocess.CalledProcessError as e:
-        log.info("Command failed", cmd=cmd, status=e.returncode, output=e.output)
-        return e.returncode, e.output
+    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+    lines = []
+    res = None
+    start = time.time()
+    while time.time() - start <= MAX_TIME:
+        time.sleep(0.1)
+        output = process.stdout.readline()
+        if output == b"" and process.poll() is not None:
+            break
+        if output:
+            res = output.strip()
+            lines.append(res.decode("utf-8", "ignore"))
+        else:
+            time.sleep(5)
+
+    if time.time() - start > MAX_TIME:
+        log.error(
+            "TEST-UNEXPECTED-FAIL | Timed out waiting for response from command",
+            cmd=cmd,
+        )
+        return 1, "Timed out"
+
+    rc = process.poll()
+    job_prefix = "[JOB-" + str(job_count) + "] "
+    for line in lines:
+        # Some output doesn't start with the levels because it comes
+        # from FFMPEG rather than the script itself
+        if line.startswith(("[INFO]", "[WARNING]", "[CRITICAL]", "[ERROR]")):
+            splitline = line.split(" - ")
+            level = splitline[0]
+            line = " - ".join(splitline[1:])
+        else:
+            level = "[INFO]"
+
+        newline = job_prefix + line
+        if level.strip() in ("[ERROR]", "[CRITICAL]"):
+            if rc == 0:
+                rc = 1
+            log.error("TEST-UNEXPECTED-FAIL | " + newline)
+        elif level == "[WARNING]":
+            log.warning(newline)
+        else:
+            log.info(newline)
+
+    return rc, res
 
 
 def append_result(log, suites, test_name, name, result, extra_options):
@@ -107,13 +164,14 @@ def append_result(log, suites, test_name, name, result, extra_options):
         log.error("%s" % result)
         result = 0
 
+    orig_test_name = test_name
     if test_name in suites and suites[test_name]["extraOptions"] != extra_options:
         missing = set(extra_options) - set(suites[test_name]["extraOptions"])
         test_name = test_name + "-".join(list(missing))
 
     subtests = suites.setdefault(
         test_name,
-        {"name": test_name, "subtests": {}, "extraOptions": extra_options}
+        {"name": orig_test_name, "subtests": {}, "extraOptions": extra_options},
     )["subtests"]
 
     if name not in subtests:
@@ -122,6 +180,7 @@ def append_result(log, suites, test_name, name, result, extra_options):
             "replicates": [result],
             "lowerIsBetter": True,
             "unit": "ms",
+            "shouldAlert": SHOULD_ALERT[name],
         }
     else:
         subtests[name]["replicates"].append(result)
@@ -169,7 +228,7 @@ def read_json(json_path, schema):
         The contents of the file at ``json_path`` interpreted as JSON.
     """
     try:
-        with open(str(json_path),  "r", encoding="utf-8", errors="ignore") as f:
+        with open(str(json_path), "r", encoding="utf-8", errors="ignore") as f:
             data = json.load(f)
     except Exception:
         log.error("Could not read JSON file", path=json_path, exc_info=True)
@@ -219,7 +278,7 @@ def main(log, args):
         log.error(
             "Could not read/extract browsertime results archive",
             path=browsertime_results_path,
-            exc_info=True
+            exc_info=True,
         )
         return 1
     log.info("Extracted browsertime results", path=browsertime_results_path)
@@ -229,13 +288,12 @@ def main(log, args):
         jobs_json = read_json(jobs_json_path, JOB_SCHEMA)
     except Exception:
         log.error(
-            "Could not open the jobs.json file",
-            path=jobs_json_path,
-            exc_info=True
+            "Could not open the jobs.json file", path=jobs_json_path, exc_info=True
         )
         return 1
 
     jobs = []
+    count = 0
 
     for job in jobs_json["jobs"]:
         browsertime_json_path = fetch_dir / job["browsertime_json_path"]
@@ -246,19 +304,22 @@ def main(log, args):
             log.error(
                 "Could not open a browsertime.json file",
                 path=browsertime_json_path,
-                exc_info=True
+                exc_info=True,
             )
             return 1
 
         for site in browsertime_json:
             for video in site["files"]["video"]:
+                count += 1
                 jobs.append(
                     Job(
                         test_name=job["test_name"],
-                        extra_options=len(job["extra_options"]) > 0 and
-                        job["extra_options"] or jobs_json["extra_options"],
+                        extra_options=len(job["extra_options"]) > 0
+                        and job["extra_options"]
+                        or jobs_json["extra_options"],
                         json_path=browsertime_json_path,
                         video_path=browsertime_json_path.parent / video,
+                        count=count,
                     )
                 )
 
@@ -286,10 +347,10 @@ def main(log, args):
                 )
                 failed_runs += 1
             else:
-                # Python 3.5 requires a str object (not 3.6+)
-                res = json.loads(res.decode("utf8"))
                 for name, value in res.items():
-                    append_result(log, suites, job.test_name, name, value, job.extra_options)
+                    append_result(
+                        log, suites, job.test_name, name, value, job.extra_options
+                    )
 
     suites = [get_suite(suite) for suite in suites.values()]
 
@@ -300,23 +361,11 @@ def main(log, args):
         "suites": suites,
     }
 
-    # Try to get the similarity for all possible tests, this means that we
-    # will also get a comparison of recorded vs. live sites to check
-    # the on-going quality of our recordings.
-    try:
-        from similarity import calculate_similarity
-        for name, value in calculate_similarity(jobs_json, fetch_dir, OUTPUT_DIR).items():
-            if value is None:
-                continue
-            suites[0]["subtests"].append({
-                "name": name,
-                "value": value,
-                "replicates": [value],
-                "lowerIsBetter": False,
-                "unit": "a.u.",
-            })
-    except Exception:
-        log.info("Failed to calculate similarity score", exc_info=True)
+    # TODO: Try to get the similarity for all possible tests, this means that we
+    # will also get a comparison of recorded vs. live sites to check the on-going
+    # quality of our recordings.
+    # Bug 1674927 - Similarity metric is disabled until we figure out
+    # why it had a huge increase in run time.
 
     # Validates the perf data complies with perfherder schema.
     # The perfherder schema uses jsonschema so we can't use voluptuous here.
@@ -350,18 +399,52 @@ def run_visual_metrics(job, visualmetrics_path, options):
     Returns:
        A returncode and a string containing the output of visualmetrics.py
     """
-    cmd = ["/usr/bin/python", str(visualmetrics_path), "--video", str(job.video_path)]
+    cmd = [
+        "/usr/bin/python",
+        str(visualmetrics_path),
+        "-vvv",
+        "--logformat",
+        "[%(levelname)s] - %(message)s",
+        "--video",
+        str(job.video_path),
+    ]
     cmd.extend(options)
-    return run_command(log, cmd)
+    rc, res = run_command(log, cmd, job.count)
+
+    if rc == 0:
+        # Python 3.5 requires a str object (not 3.6+)
+        res = json.loads(res.decode("utf8"))
+
+        # Ensure that none of these values are at 0 which
+        # is indicative of a failling test
+        monitored_tests = [
+            "contentfulspeedindex",
+            "lastvisualchange",
+            "perceptualspeedindex",
+            "speedindex",
+        ]
+        failed_tests = []
+        for metric, val in res.items():
+            if metric.lower() in monitored_tests and val == 0:
+                failed_tests.append(metric)
+        if failed_tests:
+            log.error(
+                "TEST-UNEXPECTED-FAIL | Some visual metrics have an erroneous value of 0."
+            )
+            log.info("Tests which failed: %s" % str(failed_tests))
+            rc += 1
+
+    return rc, res
 
 
 if __name__ == "__main__":
+    logging.basicConfig(format="%(levelname)s - %(message)s", level=logging.INFO)
     structlog.configure(
         processors=[
-            structlog.processors.TimeStamper(fmt="iso"),
             structlog.processors.format_exc_info,
             structlog.dev.ConsoleRenderer(colors=False),
         ],
+        logger_factory=structlog.stdlib.LoggerFactory(),
         cache_logger_on_first_use=True,
     )