Enriching the dataset with VirusTotal
Posted Dec 21, 2018 |
Section 3.4
#!/usr/local/bin/python3
import json
import hashlib
from virus_total_apis import PrivateApi as VirusTotalPrivateApi
from virus_total_apis import PublicApi as VirusTotalPublicApi
import psycopg2
import pprint
import time
conn = psycopg2.connect("REDACTED")
cur = conn.cursor()
API_KEY = 'REDACTED'
vt = VirusTotalPrivateApi(API_KEY)
cur.execute("select sha from malware_metadata where processing_result is null limit 1000")
rows = cur.fetchall()
for row in rows:
cursor = conn.cursor()
file_sha = row[0]
response = vt.get_file_report(file_sha, allinfo=1)
if response["results"]["response_code"] == 1:
# compose the vendor names collection
composite_names, padding = "", ""
for scan in response["results"]["scans"]:
n = response["results"]["scans"][scan]
if n["detected"]:
composite_names += padding + n["result"]
padding = "; "
# update the relevant row of the aggregate table
cursor.execute("update malware_metadata set (scan_date, first_seen, times_submitted, \
type, positives, unique_sources, ssdeep, vt_resource, last_seen, \
other_family, processing_result) = \
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) where sha = %s",
(
response["results"]["scan_date"],
response["results"]["first_seen"],
response["results"]["times_submitted"],
response["results"]["type"],
response["results"]["positives"],
response["results"]["unique_sources"],
response["results"]["ssdeep"],
response["results"]["resource"],
response["results"]["last_seen"],
composite_names,
"PROCESSED",
file_sha
) )
conn.commit()
print(str(cursor.rowcount)+" row updated (found): "+cursor.statusmessage)
cursor.close()
else:
cursor.execute("update observed set processing_result=%s where sha = %s", ("NOTFOUND", file_sha))
conn.commit()
cursor.close
print(str(cursor.rowcount)+" row updated (not found): "+cursor.statusmessage)