summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorW. Kosior <koszko@koszko.org>2024-12-17 17:34:11 +0100
committerW. Kosior <koszko@koszko.org>2024-12-17 17:34:11 +0100
commitb94e315bc34bd5ce93d69d68b2837dc968a4a39a (patch)
treef56d6b7e73772c4b24aaf4c9db489fcbf27713fe
parentb6418bfbfad8fb3f0d9f206163496a10b36a1877 (diff)
downloadAGH-threat-intel-course-b94e315bc34bd5ce93d69d68b2837dc968a4a39a.tar.gz
AGH-threat-intel-course-b94e315bc34bd5ce93d69d68b2837dc968a4a39a.zip
Attach scraped data to that from profiles.yaml and output together.
-rw-r--r--Makefile9
-rw-r--r--scrape_groups_info.py28
2 files changed, 19 insertions, 18 deletions
diff --git a/Makefile b/Makefile
index b6042c1..7154069 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@
PYTHON=python3
PANDOC=pandoc
-all: tables.pdf scraped_info.yaml
+all: tables.pdf profiles_with_scraped_info.yaml
.PHONY: all
.SUFFIXES: .pdf .md
@@ -25,18 +25,19 @@ all: tables.pdf scraped_info.yaml
tables.md: threats_by_sector_table.py profiles.yaml
$(PYTHON) $^ > $@
-scraped_info.yaml: scrape_groups_info.py profiles.yaml
+profiles_with_scraped_info.yaml: scrape_groups_info.py profiles.yaml
$(PYTHON) $^ > $@
th-proj-archive.tar.gz: Makefile profiles.yaml scrape_groups_info.py \
- scraped_info.yaml tables.md tables.pdf \
+ profiles_with_scraped_info.yaml tables.md tables.pdf \
threats_by_sector_table.py
tar --transform='s|^|th-proj-archive/|' \
--mtime=1970-01-01T00:00:00-00:00 --group=0 --owner=0 \
-czf $@ $^
clean:
- rm -rf scraped_info.yaml tables.pdf tables.md th-proj-archive.tar.gz
+ rm -rf profiles_with_scraped_info.yaml tables.pdf tables.md \
+ th-proj-archive.tar.gz
.PHONY: clean
magisterclean: clean
diff --git a/scrape_groups_info.py b/scrape_groups_info.py
index 92219db..ac6956b 100644
--- a/scrape_groups_info.py
+++ b/scrape_groups_info.py
@@ -109,28 +109,28 @@ def get_groups_and_techniques(relevant_names):
return groups, all_techniques
-def get_group_names(profiles_path):
- def group_names(inp):
- return {group["name"] for group in yaml.safe_load(inp)["groups"]}
-
+def get_profiles_data(profiles_path):
if profiles_path:
with open(profiles_path) as inp:
- return group_names(inp)
+ return yaml.safe_load(inp)
- return group_names(sys.stdin)
+ return yaml.safe_load(sys.stdin)
if __name__ == "__main__":
- group_names = get_group_names(None if len(sys.argv) < 2 else sys.argv[1])
- groups, techniques = get_groups_and_techniques(group_names)
- missing_names = group_names.difference(groups)
+ profiles_data = get_profiles_data(None if len(sys.argv) < 2
+ else sys.argv[1])
+ group_profiles = dict((g["name"],g) for g in profiles_data["groups"])
+ groups, techniques = get_groups_and_techniques(group_profiles)
+ missing_names = set(group_profiles).difference(groups)
if missing_names:
print(f"No data found for group(s): {', '.join(sorted(missing_names))}",
file=sys.stderr)
- out_obj = {
- "groups": [g.__dict__ for g in groups.values()],
- "techniques": [t.__dict__ for t in techniques.values()]
- }
+ for name, group in groups.items():
+ group_profiles[name].update(group.__dict__)
+
+ profiles_data["groups"] = list(group_profiles.values())
+ profiles_data["techniques"] = [t.__dict__ for t in techniques.values()]
- yaml.safe_dump(out_obj, sys.stdout)
+ yaml.safe_dump(profiles_data, sys.stdout)