Skip to content

Instantly share code, notes, and snippets.

@NathanHuisman
Created November 4, 2021 22:02
Show Gist options
  • Save NathanHuisman/dd58bb033615b748188094a6a2145633 to your computer and use it in GitHub Desktop.
Save NathanHuisman/dd58bb033615b748188094a6a2145633 to your computer and use it in GitHub Desktop.
Code to scan minecraft world for items in tile entities.
"""
A python script I put together in a couple of minutes (I'm lying, hours)
to go though a minecraft world and read all the tile entities.
Player inventories are not scanned yet.
License MIT (whatever)
Dependencies are numpy and pynbt
Nathan Huisman, 2021
"""
import logging
import os
import numpy as np
import struct
import argparse
import re
import gzip
import zlib
import io
import pynbt
from typing import Iterator, NamedTuple
from enum import Enum
class Coord(NamedTuple):
x: int
z: int
class Dimension(Enum):
overworld='overworld'
nether='nether'
the_end='the_end'
class Region(NamedTuple):
coord: Coord
dimension: Dimension
file: os.DirEntry
def get_regions(world: str) -> Iterator[Region]:
try:
regions = {Dimension(name): os.scandir(os.path.join(world, dir)) for name, dir in [('overworld', 'region'), ('nether', 'DIM1'), ('the_end', 'DIM-1')]}
except OSError as e:
raise Exception('Cannot open minecraft world, is it corrupt or did you specify the wrong path?') from e
region_regex = r'^r\.(-?[0-9]+)\.(-?[0-9]+)\.mca$'
for dimension, dim_regions in regions.items():
for rfile in dim_regions:
m = re.match(region_regex, rfile.name)
if not m:
# Shouldn't happen but keeps intellisense happy
continue
coord = Coord(int(m.group(1)), int(m.group(2)))
yield Region(coord, dimension, rfile)
class Chunk(NamedTuple):
coord: Coord
dimension: Dimension
data: bytes
def get_chunks(region: Region) -> Iterator[Chunk]:
with open(region.file, mode='rb') as region_file:
loclist = np.fromfile(region_file, count=1024, dtype='>u4')
offsets = (loclist >> 8)
for n, off in enumerate(offsets):
if off == 0:
continue
chunk_coord = Coord(region.coord.x * 32 + (n & 31), region.coord.z * 32 + (n >> 5))
region_file.seek(off * 4096)
chunk_header = region_file.read(5)
compressed_length, compression_type = struct.unpack('>IB', chunk_header)
data = region_file.read(compressed_length - 1)
decompressed_data = None
# Wow, even added support for all the compression type! even though 1 and 3 aren't actually used
if compression_type == 1:
decompressed_data = gzip.decompress(data)
elif compression_type == 2:
decompressed_data = zlib.decompress(data)
elif compression_type == 3:
decompressed_data = data
else:
raise Exception("Chunk compression type unknown", chunk_coord, region)
yield Chunk(chunk_coord, region.dimension, decompressed_data)
def main():
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(description='Extract all inventories from a Minecraft save (Java Edition)')
parser.add_argument('world', help='The world to scan for inventories')
#TODO: Add logging verbosity option
ns = parser.parse_args()
regions = list(get_regions(ns.world))
chunks_to_scan = len(regions) * 1024
for n, region in enumerate(regions):
logging.debug(f'Found region {region.file.name} @ {region.coord}', extra={'region': region})
for m, chunk in enumerate(get_chunks(region)):
nbtio = io.BytesIO(chunk.data)
nbt = pynbt.NBTFile(nbtio)
tile_entities = nbt['Level']['TileEntities']
chunks_scanned = n * 1024 + m + 1
logging.debug(f'Found chunk @ {chunk.coord} with uncompressed size {len(chunk.data)} and {len(tile_entities)} tile entities', extra={'region':region, 'chunk':chunk})
logging.debug(f'Scanning chunk {chunks_scanned}/{chunks_to_scan} ({100*chunks_scanned/chunks_to_scan:.1f}%)')
if len(tile_entities) > 0:
for tile_entity in tile_entities:
items_found = {}
if 'Items' in tile_entity:
for item in tile_entity['Items']:
id = item['id'].value
items_found[id] = items_found.get(id, 0) + item['Count'].value
# TODO: make this less ugly using recursion or something.
# We don't need to iterate to more than 2 levels, I assume
# people aren't putting chests in chests in
# creative or something.
if 'tag' in item and 'BlockEntityTag' in item['tag'] and 'Items' in item['tag']['BlockEntityTag']:
for nested_item in item['tag']['BlockEntityTag']['Items']:
n_id = nested_item['id'].value
items_found[n_id] = items_found.get(n_id, 0) + nested_item['Count'].value
x, y, z = [tile_entity[i].value for i in ['x','y','z']]
for item, count in items_found.items():
print(f'{tile_entity["id"].value},minecraft:{chunk.dimension.value},{x},{y},{z},{item},{count}')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment