thesis.yaxs.net

Enriching the dataset with VirusTotal

Posted Dec 21, 2018 | Section 3.4

#!/usr/local/bin/python3
import json
import hashlib
from virus_total_apis import PrivateApi as VirusTotalPrivateApi
from virus_total_apis import PublicApi as VirusTotalPublicApi
import psycopg2  
import pprint
import time

conn = psycopg2.connect("REDACTED")
cur = conn.cursor()
API_KEY = 'REDACTED'
vt = VirusTotalPrivateApi(API_KEY)

cur.execute("select sha from malware_metadata where processing_result is null limit 1000")
rows = cur.fetchall()

for row in rows:
  cursor = conn.cursor()
  file_sha = row[0]
  response = vt.get_file_report(file_sha, allinfo=1)
  if response["results"]["response_code"] == 1:
    # compose the vendor names collection
    composite_names, padding = "", ""
    for scan in response["results"]["scans"]:
      n = response["results"]["scans"][scan]
      if n["detected"]:
        composite_names += padding + n["result"]
        padding = "; "

    # update the relevant row of the aggregate table
    cursor.execute("update malware_metadata set (scan_date, first_seen, times_submitted, \
      type, positives, unique_sources, ssdeep, vt_resource, last_seen, \
      other_family, processing_result) = \
      (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) where sha = %s",
      (
          response["results"]["scan_date"],
          response["results"]["first_seen"],
          response["results"]["times_submitted"],
          response["results"]["type"],
          response["results"]["positives"],
          response["results"]["unique_sources"],
          response["results"]["ssdeep"],
          response["results"]["resource"],
          response["results"]["last_seen"],
          composite_names,
          "PROCESSED",
          file_sha
      ) )
    conn.commit()
    print(str(cursor.rowcount)+" row updated (found): "+cursor.statusmessage)
    cursor.close()
  else:
    cursor.execute("update observed set processing_result=%s where sha = %s", ("NOTFOUND", file_sha))
    conn.commit()
    cursor.close
    print(str(cursor.rowcount)+" row updated (not found): "+cursor.statusmessage)