Created
November 4, 2021 22:02
-
-
Save NathanHuisman/dd58bb033615b748188094a6a2145633 to your computer and use it in GitHub Desktop.
Code to scan minecraft world for items in tile entities.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A python script I put together in a couple of minutes (I'm lying, hours) | |
to go though a minecraft world and read all the tile entities. | |
Player inventories are not scanned yet. | |
License MIT (whatever) | |
Dependencies are numpy and pynbt | |
Nathan Huisman, 2021 | |
""" | |
import logging | |
import os | |
import numpy as np | |
import struct | |
import argparse | |
import re | |
import gzip | |
import zlib | |
import io | |
import pynbt | |
from typing import Iterator, NamedTuple | |
from enum import Enum | |
class Coord(NamedTuple): | |
x: int | |
z: int | |
class Dimension(Enum): | |
overworld='overworld' | |
nether='nether' | |
the_end='the_end' | |
class Region(NamedTuple): | |
coord: Coord | |
dimension: Dimension | |
file: os.DirEntry | |
def get_regions(world: str) -> Iterator[Region]: | |
try: | |
regions = {Dimension(name): os.scandir(os.path.join(world, dir)) for name, dir in [('overworld', 'region'), ('nether', 'DIM1'), ('the_end', 'DIM-1')]} | |
except OSError as e: | |
raise Exception('Cannot open minecraft world, is it corrupt or did you specify the wrong path?') from e | |
region_regex = r'^r\.(-?[0-9]+)\.(-?[0-9]+)\.mca$' | |
for dimension, dim_regions in regions.items(): | |
for rfile in dim_regions: | |
m = re.match(region_regex, rfile.name) | |
if not m: | |
# Shouldn't happen but keeps intellisense happy | |
continue | |
coord = Coord(int(m.group(1)), int(m.group(2))) | |
yield Region(coord, dimension, rfile) | |
class Chunk(NamedTuple): | |
coord: Coord | |
dimension: Dimension | |
data: bytes | |
def get_chunks(region: Region) -> Iterator[Chunk]: | |
with open(region.file, mode='rb') as region_file: | |
loclist = np.fromfile(region_file, count=1024, dtype='>u4') | |
offsets = (loclist >> 8) | |
for n, off in enumerate(offsets): | |
if off == 0: | |
continue | |
chunk_coord = Coord(region.coord.x * 32 + (n & 31), region.coord.z * 32 + (n >> 5)) | |
region_file.seek(off * 4096) | |
chunk_header = region_file.read(5) | |
compressed_length, compression_type = struct.unpack('>IB', chunk_header) | |
data = region_file.read(compressed_length - 1) | |
decompressed_data = None | |
# Wow, even added support for all the compression type! even though 1 and 3 aren't actually used | |
if compression_type == 1: | |
decompressed_data = gzip.decompress(data) | |
elif compression_type == 2: | |
decompressed_data = zlib.decompress(data) | |
elif compression_type == 3: | |
decompressed_data = data | |
else: | |
raise Exception("Chunk compression type unknown", chunk_coord, region) | |
yield Chunk(chunk_coord, region.dimension, decompressed_data) | |
def main(): | |
logging.basicConfig(level=logging.INFO) | |
parser = argparse.ArgumentParser(description='Extract all inventories from a Minecraft save (Java Edition)') | |
parser.add_argument('world', help='The world to scan for inventories') | |
#TODO: Add logging verbosity option | |
ns = parser.parse_args() | |
regions = list(get_regions(ns.world)) | |
chunks_to_scan = len(regions) * 1024 | |
for n, region in enumerate(regions): | |
logging.debug(f'Found region {region.file.name} @ {region.coord}', extra={'region': region}) | |
for m, chunk in enumerate(get_chunks(region)): | |
nbtio = io.BytesIO(chunk.data) | |
nbt = pynbt.NBTFile(nbtio) | |
tile_entities = nbt['Level']['TileEntities'] | |
chunks_scanned = n * 1024 + m + 1 | |
logging.debug(f'Found chunk @ {chunk.coord} with uncompressed size {len(chunk.data)} and {len(tile_entities)} tile entities', extra={'region':region, 'chunk':chunk}) | |
logging.debug(f'Scanning chunk {chunks_scanned}/{chunks_to_scan} ({100*chunks_scanned/chunks_to_scan:.1f}%)') | |
if len(tile_entities) > 0: | |
for tile_entity in tile_entities: | |
items_found = {} | |
if 'Items' in tile_entity: | |
for item in tile_entity['Items']: | |
id = item['id'].value | |
items_found[id] = items_found.get(id, 0) + item['Count'].value | |
# TODO: make this less ugly using recursion or something. | |
# We don't need to iterate to more than 2 levels, I assume | |
# people aren't putting chests in chests in | |
# creative or something. | |
if 'tag' in item and 'BlockEntityTag' in item['tag'] and 'Items' in item['tag']['BlockEntityTag']: | |
for nested_item in item['tag']['BlockEntityTag']['Items']: | |
n_id = nested_item['id'].value | |
items_found[n_id] = items_found.get(n_id, 0) + nested_item['Count'].value | |
x, y, z = [tile_entity[i].value for i in ['x','y','z']] | |
for item, count in items_found.items(): | |
print(f'{tile_entity["id"].value},minecraft:{chunk.dimension.value},{x},{y},{z},{item},{count}') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment