summaryrefslogtreecommitdiffhomepage
path: root/scrap.py
blob: c985e8cabd53567de47a75b57de695061a7df528 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import argparse
import json
import re
import sys
import time
from html.parser import HTMLParser

import requests


OWNER_HREF = re.compile(r"/(?:corporation|alliance)/(\d+)/?")
SYSTEM_AND_DATE = re.compile(r"/(\d+)/(\d+)/?$")


def get_hash(kill):
	"""
	Looks up and returns hash of the *kill* using Zkillboard's API.
	"""
	response = requests.get("https://zkillboard.com/api/killID/{}/".format(kill))
	response.raise_for_status()
	data = response.json()
	if len(data) > 1:
		raise ValueError()
	return data[0]["zkb"]["hash"]


class RelatedParser(HTMLParser):
	"""
	Reads kill IDs and teams from Zkillboard's related kills page.
	"""
	def __init__(self):
		super().__init__()
		self._team = 0
		self._kills = set()
		self._current = None

	def handle_starttag(self, tag, attrs):
		attrs = dict(attrs)

		if tag == "table" and attrs.get("id", "").lower() == "killlist":
			self._team += 1

		if tag == "tr" and attrs.get("class", "").lower() == "killlistrow" and self._team > 0:
			self._flush()
			killid = attrs.get("killid", "")
			self._current = (killid, self._team, None)

		if tag == "a" and self._team > 0 and self._current:
			match = OWNER_HREF.match(attrs.get("href", ""))
			if match:
				self._current = (*self._current[:2], match.group(1))
				self._flush()

	def _flush(self):
		if self._current and all(self._current):
			self._kills.add(self._current)
			self._current = None

	@property
	def kills(self):
		"""
		Returns all kills found by the parser along with their team and the ID of the victim.
		"""
		self._flush()
		return self._kills


def get_related_kills(url):
	"""
	Builds basic snapshot containing all killmails from battle report at *url*.
	"""
	response = requests.get(url)
	response.raise_for_status()
	page = response.text
	related = RelatedParser()
	related.feed(page)
	killmails = []
	teams = (set(), set())
	for kill, team, victim in related.kills:
		killmails.append({"id": int(kill)})
		destination = teams[team - 1]
		destination.add(int(victim))
	return {"killmails": killmails, "teams": list(map(list, teams))}


def expand_hashes(snapshot):
	"""
	Expands killmails in *snapshot* IN PLACE by adding their hash based on information from Zkillboard.
	"""
	for killmail in snapshot["killmails"]:
		killmail["hash"] = get_hash(killmail["id"])
		time.sleep(1.05)  # Zkillboard is very sensitive.
	return snapshot


def output_name(args):
	"""
	Generates name of the output file based on the CLI *args*.
	"""
	if args.output:
		return args.output
	match = SYSTEM_AND_DATE.search(args.url)
	if match:
		return "{}_{}.json".format(*match.groups())
	return "a.json"


def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("url")
	parser.add_argument("-o", "--output")
	parser.add_argument("--pretty", action='store_true')
	args = parser.parse_args()
	snapshot = get_related_kills(args.url)
	expand_hashes(snapshot)
	filename = output_name(args)
	with open(filename, "w") as fd:
		opts = {'indent': 4} if args.pretty else {}
		fd.write(json.dumps(snapshot, **opts))
		fd.write("\n")


if __name__ == "__main__":
	main()