doesthismatter

🧩 Syntax:
from elasticsearch import Elasticsearch

client = Elasticsearch("http://localhost:9200")
query_size = 1000

res = client.search(
    index="netflow",
    size=0,
    aggs={
        "ips" : {
            "composite": {
                "size": query_size,
                "sources": [
                    {"ip": {"terms": {"field": "source_ip"}}}
                ]
            }
        }
    }
)

after_key = res["aggregations"]["ips"]["after_key"]

ips = set()
total = 0

for bucky in res["aggregations"]["ips"]["buckets"]:

    total += bucky["doc_count"]
    ips.add(bucky["key"]["ip"])

querycount = 1
while True:
    res = client.search(
        index="netflow",
        size=0,
        aggs={
            "ips" : {
                "composite": {
                    "size": query_size,
                    "sources": [
                        {"ip": {"terms": {"field": "source_ip"}}}
                    ],
                    "after": after_key
                }
            }
        }
    )
    querycount += 1
    after_key = res["aggregations"]["ips"]["after_key"]

    for bucky in res["aggregations"]["ips"]["buckets"]:

        total += bucky["doc_count"]
        ips.add(bucky["key"]["ip"])

    if len(res["aggregations"]["ips"]["buckets"]) < query_size:
        print("final bucky size:", len(res["aggregations"]["ips"]["buckets"]))
        break

print("total documents:", total)
print("total ips:", len(ips))
print("query count:", querycount)

from elasticsearch import Elasticsearch

with open("cidrs.txt", "r") as f:
    cidrs = [x.strip() for x in f.readlines()]

ranges = [{"mask": cidr} for cidr in cidrs]

client = Elasticsearch("http://localhost:9200")

res = client.search(
    index="netflow",
    size=0,
    aggs={
        "cidr_hits" : {
            "ip_range": {
                "field": "source_ip",
                "ranges": ranges
            }
        }
    }
)

print(res["aggregations"]["cidr_hits"]["buckets"])