summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAki <please@ignore.pl>2022-05-24 23:47:05 +0200
committerAki <please@ignore.pl>2022-05-24 23:47:05 +0200
commit1ad8d912fac79bac70b0e55d75f63ca951921bb3 (patch)
treeb6aa734bcff1a59508db6d9ae5c617526f58ae8d
parent3e2c2543cdf98dea0b866d60b2dae2ddfe961863 (diff)
downloadfield-1ad8d912fac79bac70b0e55d75f63ca951921bb3.zip
field-1ad8d912fac79bac70b0e55d75f63ca951921bb3.tar.gz
field-1ad8d912fac79bac70b0e55d75f63ca951921bb3.tar.bz2
Removed scrapping script in favour of the separate repository
-rw-r--r--.gitignore6
-rw-r--r--scrap.py94
2 files changed, 2 insertions, 98 deletions
diff --git a/.gitignore b/.gitignore
index 9ea276c..0c0820e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,2 @@
-__pycache__
-*.json
-derelict
-.derelict
+derelict/
+.derelict/
diff --git a/scrap.py b/scrap.py
deleted file mode 100644
index 07e4036..0000000
--- a/scrap.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import argparse
-import json
-import re
-import sys
-import time
-from html.parser import HTMLParser
-
-import requests
-
-
-OWNER_HREF = re.compile(r"/(?:corporation|alliance)/(\d+)/?")
-SYSTEM_AND_DATE = re.compile(r"/(\d+)/(\d+)/$")
-
-
-def get_hash(kill):
- response = requests.get("https://zkillboard.com/api/killID/{}/".format(kill))
- response.raise_for_status()
- data = response.json()
- if len(data) > 1:
- raise ValueError()
- return data[0]["zkb"]["hash"]
-
-
-class RelatedParser(HTMLParser):
- def __init__(self):
- super().__init__()
- self._team = 0
- self._kills = set()
- self._current = None
-
- def handle_starttag(self, tag, attrs):
- attrs = dict(attrs)
-
- if tag == "table" and attrs.get("id", "").lower() == "killlist":
- self._team += 1
-
- if tag == "tr" and attrs.get("class", "").lower() == "killlistrow" and self._team > 0:
- self._flush()
- killid = attrs.get("killid", "")
- self._current = (self._team, killid, None)
-
- if tag == "a" and self._team > 0 and self._current:
- match = OWNER_HREF.match(attrs.get("href", ""))
- if match:
- self._current = (*self._current[:2], match.group(1))
- self._flush()
-
- def _flush(self):
- if self._current and all(self._current):
- self._kills.add(self._current)
- self._current = None
-
- @property
- def kills(self):
- self._flush()
- return self._kills
-
-
-def main():
- parser = argparse.ArgumentParser()
- parser.add_argument("url")
- parser.add_argument("-o", "--output")
- args = parser.parse_args()
-
- response = requests.get(args.url)
- response.raise_for_status()
- page = response.text
-
- related = RelatedParser()
- related.feed(page)
-
- killmails = []
- teams = (set(), set())
- for team, kill, owner in related.kills:
- time.sleep(1.05) # Zkillboard is very sensitive.
- killmails.append({"id": int(kill), "hash": get_hash(kill)})
- destination = teams[team - 1]
- destination.add(int(owner))
-
- if args.output:
- filename = args.output
- else:
- match = SYSTEM_AND_DATE.search(args.url)
- if match:
- filename = "{}_{}.json".format(*match.groups())
- else:
- filename = "scrapped.json"
-
- with open(filename, "w") as file:
- file.write(json.dumps({"killmails": killmails, "teams": tuple(map(list, teams))}))
-
-
-if __name__ == "__main__":
- main()