Delete nodes without associated container data
Use the Python script provided below to delete nodes without associated container data. We strongly recommend first running the script in dry-run mode to determine which nodes it'll delete.
pericol
Make sure that the user or service account running the script has the datamodels:read:all
capability. The script returns only the containers it has access to. If the script has limited container access, it may delete instances that still have data.
from cognite.client.data_classes.capabilities import DataModelsAcl
from cognite.client.data_classes.data_modeling.instances import InvolvedContainers
from cognite.client.data_classes.filters import MatchAll, SpaceFilter
from cognite.client import CogniteClient
client = CogniteClient()
# Make sure that the principal running the script has the `datamodels:read:all`capability.
# The script returns only the containers it has access to. If the script has limited container access,
# it may delete instances that still have data.
to_check = [
DataModelsAcl(
actions=[DataModelsAcl.Action.Read],
scope=DataModelsAcl.Scope.All())
]
if missing := client.iam.verify_capabilities(to_check):
raise Exception(f"You don't have the necessary capabilities to scan all containers: {missing}")
# Set dry_run to False to delete the nodes. Set to True to list the nodes without deleting them.
dry_run = True
print(f"Dry run: {dry_run}")
if dry_run:
print("This is a dry run. The nodes will be listed, but not deleted.")
else:
print("This is NOT a dry run. The nodes will be deleted.")
# Replace this filter to scope which nodes to delete
filter = MatchAll()
# You can, for example, use a space filter:
# filter = SpaceFilter("myspace")
# Type nodes must be deleted _after_ you have deleted the instances pointing to them.
# You can use a filter to exclude the space where you store the type nodes.
# filter = Not(SpaceFilter("my_type_node_space"))
# If you haven't stored your type nodes in a dedicated space and need to filter them out by
# external ID, do that on the client side to prevent the query performance from degrading.
deleted_total = 0
for batch in client.data_modeling.instances(instance_type="node", filter=filter, chunk_size=1000):
result = client.data_modeling.instances.inspect(
nodes=batch.as_ids(),
involved_containers=InvolvedContainers()
)
nodes_without_container_data = [
(node.space, node.external_id) for node in result.nodes
if not node.inspection_results.involved_containers
]
if dry_run:
for space, external_id in nodes_without_container_data:
print(f"{space}:{external_id}")
else:
client.data_modeling.instances.delete(nodes=nodes_without_container_data)
deleted_total += len(nodes_without_container_data)
print(f"Deleted {len(nodes_without_container_data)} nodes without container data")
print(f"Deleted a total of {deleted_total} nodes")