summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--scrap.py60
1 files changed, 60 insertions, 0 deletions
diff --git a/scrap.py b/scrap.py
new file mode 100644
index 0000000..0a2bbae
--- /dev/null
+++ b/scrap.py
@@ -0,0 +1,60 @@
+import argparse
+import json
+import re
+import sys
+import time
+
+import requests
+
+KILL_HREF = re.compile(r"href=\"/kill/(\d+)/\"")
+SYSTEM_AND_DATE = re.compile(r"/(\d+)/(\d+)/$")
+
+
+def unique_kills_in(page):
+ so_far = set()
+ for match in KILL_HREF.finditer(page):
+ kill = match.group(1)
+ if kill not in so_far:
+ so_far.add(kill)
+ yield kill
+
+
+def get_hash(kill):
+ response = requests.get("https://zkillboard.com/api/killID/{}/".format(kill))
+ response.raise_for_status()
+ data = response.json()
+ if len(data) > 1:
+ raise ValueError()
+ return data[0]["zkb"]["hash"]
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("url")
+ parser.add_argument("-o", "--output")
+ args = parser.parse_args()
+
+ response = requests.get(args.url)
+ response.raise_for_status()
+ page = response.text
+
+ output = []
+ for kill in unique_kills_in(page):
+ time.sleep(1.05) # Zkillboard is very sensitive.
+ output.append({"id": kill, "hash": get_hash(kill)})
+
+ if args.output:
+ filename = args.output
+ else:
+ match = SYSTEM_AND_DATE.search(args.url)
+ if match:
+ filename = "{}_{}.json".format(*match.groups())
+ else:
+ filename = "scrapped.json"
+
+ with open(filename, "w") as file:
+ file.write(json.dumps(output))
+
+
+if __name__ == "__main__":
+ main()