Hoppa till huvudinnehållet

Delete nodes without associated container data

Use the Python script provided below to delete nodes without associated container data. We strongly recommend first running the script in dry-run mode to determine which nodes it'll delete.

fara

Make sure that the user or service account running the script has the datamodels:read:all capability. The script returns only the containers it has access to. If the script has limited container access, it may delete instances that still have data.

from cognite.client.data_classes.capabilities import DataModelsAcl
from cognite.client.data_classes.data_modeling.instances import InvolvedContainers
from cognite.client.data_classes.filters import MatchAll, SpaceFilter
from cognite.client import CogniteClient



client = CogniteClient()

# Make sure that the principal running the script has the `datamodels:read:all`capability.
# The script returns only the containers it has access to. If the script has limited container access,
# it may delete instances that still have data.

to_check = [
DataModelsAcl(
actions=[DataModelsAcl.Action.Read],
scope=DataModelsAcl.Scope.All())
]

if missing := client.iam.verify_capabilities(to_check):
raise Exception(f"You don't have the necessary capabilities to scan all containers: {missing}")


# Set dry_run to False to delete the nodes. Set to True to list the nodes without deleting them.
dry_run = True
print(f"Dry run: {dry_run}")
if dry_run:
print("This is a dry run. The nodes will be listed, but not deleted.")
else:
print("This is NOT a dry run. The nodes will be deleted.")


# Replace this filter to scope which nodes to delete
filter = MatchAll()
# You can, for example, use a space filter:
# filter = SpaceFilter("myspace")

# Type nodes must be deleted _after_ you have deleted the instances pointing to them.
# You can use a filter to exclude the space where you store the type nodes.
# filter = Not(SpaceFilter("my_type_node_space"))

# If you haven't stored your type nodes in a dedicated space and need to filter them out by
# external ID, do that on the client side to prevent the query performance from degrading.

deleted_total = 0
for batch in client.data_modeling.instances(instance_type="node", filter=filter, chunk_size=1000):
result = client.data_modeling.instances.inspect(
nodes=batch.as_ids(),
involved_containers=InvolvedContainers()
)
nodes_without_container_data = [
(node.space, node.external_id) for node in result.nodes
if not node.inspection_results.involved_containers
]
if dry_run:
for space, external_id in nodes_without_container_data:
print(f"{space}:{external_id}")
else:
client.data_modeling.instances.delete(nodes=nodes_without_container_data)
deleted_total += len(nodes_without_container_data)
print(f"Deleted {len(nodes_without_container_data)} nodes without container data")

print(f"Deleted a total of {deleted_total} nodes")