lmmx · January 17, 2026 22:12
diff --git a/0_view_base_df.py b/0_view_base_df.py
 import polars as pl
 from pathlib import Path

 claims_file = Path("results/claims/chunk_0-00400-of-00546.parquet")

 lf = pl.scan_parquet(claims_file)

 base = (
    lf.select(pl.col("claims").explode().struct.unnest())
    .drop("key")
    .explode("value")
    .unnest("value")
    .unnest("mainsnak")
    .head(1)
    .collect()
 )

 print(base)
 print()
 print(base.schema)
diff --git a/1_view_cartesian_intermediate.py b/1_view_cartesian_intermediate.py
 import polars as pl
 from pathlib import Path

 claims_file = Path("results/claims/chunk_0-00400-of-00546.parquet")
 WIKIBASE_TYPES = ["wikibase-item", "wikibase-property"]

 lf = pl.scan_parquet(claims_file)

 base = (
    lf.select(pl.col("claims").explode().struct.unnest())
    .drop("key")
    .explode("value")
    .unnest("value")
    .unnest("mainsnak")
    .filter(pl.col("datatype").is_in(WIKIBASE_TYPES))
    .head(1)
    .collect()
 )

 cartesian = (
    base.lazy()
    .explode("property-labels")
    .with_columns(
        pl.col("property-labels")
        .struct.rename_fields(["prop_lang", "property_label"])
    )
    .unnest("property-labels")
    .with_columns(pl.col("datavalue").struct.field("labels").alias("dv_labels"))
    .explode("dv_labels")
    .with_columns(
        pl.col("dv_labels")
        .struct.rename_fields(["dv_lang", "datavalue_label"])
    )
    .unnest("dv_labels")
    .select(["prop_lang", "dv_lang", "property_label", "datavalue_label"])
    .collect()
 )

 print(cartesian)
 print(f"\nrows produced: {cartesian.height}")
diff --git a/2_view_dv_langs.py b/2_view_dv_langs.py
 import polars as pl
 from pathlib import Path

 claims_file = Path("results/claims/chunk_0-00400-of-00546.parquet")

 lf = pl.scan_parquet(claims_file)

 df = (
    lf.select(pl.col("claims").explode().struct.unnest())
    .drop("key")
    .explode("value")
    .unnest("value")
    .unnest("mainsnak")
    .head(3)
    .with_columns(
        pl.col("datavalue")
        .struct.field("labels")
        .list.eval(pl.element().struct.field("key"))
        .alias("_dv_langs")
    )
    .select(["property", "_dv_langs"])
    .collect()
 )

 print(df)
diff --git a/3_view_divergence_point.py b/3_view_divergence_point.py
 import polars as pl
 from pathlib import Path

 claims_file = Path("results/claims/chunk_0-00400-of-00546.parquet")
 WIKIBASE_TYPES = ["wikibase-item", "wikibase-property"]

 lf = pl.scan_parquet(claims_file)

 base = (
    lf.select(pl.col("claims").explode().struct.unnest())
    .drop("key")
    .explode("value")
    .unnest("value")
    .unnest("mainsnak")
    .filter(pl.col("datatype").is_in(WIKIBASE_TYPES))
    .head(1)
 )

 efficient_stage = (
    base
    .with_columns(
        pl.col("datavalue")
        .struct.field("labels")
        .list.eval(pl.element().struct.field("key"))
        .alias("_dv_langs")
    )
    .explode("property-labels")
    .with_columns(
        pl.col("property-labels")
        .struct.rename_fields(["language", "property_label"])
    )
    .unnest("property-labels")
    .filter(pl.col("language").is_in(pl.col("_dv_langs")))
    .select(["language", "property_label"])
    .collect()
 )

 print(efficient_stage)
 print(f"\nrows kept: {efficient_stage.height}")
diff --git a/debug_claims.py b/debug_claims.py
 #!/usr/bin/env python3
 """Compare performance of cartesian vs efficient approach."""

 import time
 from pathlib import Path

 import polars as pl

 claims_file = Path("results/claims/chunk_0-00400-of-00546.parquet")
 WIKIBASE_TYPES = ["wikibase-item", "wikibase-property"]


 def get_base(lf: pl.LazyFrame, n: int) -> pl.DataFrame:
    return (
        lf.select(pl.col("claims").explode().struct.unnest())
        .drop("key")
        .explode("value")
        .unnest("value")
        .unnest("mainsnak")
        .head(n)
        .collect()
    )


 def transform_cartesian(base: pl.DataFrame) -> pl.DataFrame:
    """Original approach - cartesian product then filter."""
    return (
        base.lazy()
        .filter(pl.col("datatype").is_in(WIKIBASE_TYPES))
        .explode("property-labels")
        .with_columns(
            pl.col("property-labels").struct.rename_fields(
                ["prop_lang", "property_label"]
            )
        )
        .unnest("property-labels")
        .with_columns(pl.col("datavalue").struct.field("labels").alias("dv_labels"))
        .explode("dv_labels")
        .with_columns(
            pl.col("dv_labels").struct.rename_fields(["dv_lang", "datavalue_label"])
        )
        .unnest("dv_labels")
        .filter(pl.col("prop_lang") == pl.col("dv_lang"))
        .rename({"prop_lang": "language"})
        .drop("dv_lang")
        .collect()
    )


 def transform_efficient(base: pl.DataFrame) -> pl.DataFrame:
    """Optimized - filter before extraction, use map_elements for lookup."""
    return (
        base.lazy()
        .filter(pl.col("datatype").is_in(WIKIBASE_TYPES))
        # Get available languages in datavalue.labels
        .with_columns(
            pl.col("datavalue")
            .struct.field("labels")
            .list.eval(pl.element().struct.field("key"))
            .alias("_dv_langs")
        )
        # Explode property-labels only
        .explode("property-labels")
        .with_columns(
            pl.col("property-labels").struct.rename_fields(
                ["language", "property_label"]
            )
        )
        .unnest("property-labels")
        # Filter to languages that exist in datavalue.labels
        .filter(pl.col("language").is_in(pl.col("_dv_langs")))
        # Extract matching label without exploding
        .with_columns(
            pl.struct(["datavalue", "language"])
            .map_elements(
                lambda row: next(
                    (
                        item["value"]
                        for item in (row["datavalue"]["labels"] or [])
                        if item["key"] == row["language"]
                    ),
                    None,
                ),
                return_dtype=pl.String,
            )
            .alias("datavalue_label")
        )
        .drop("_dv_langs")
        .collect()
    )


 # Test with increasing sizes
 lf = pl.scan_parquet(claims_file)

 for n in [10, 50, 100, 500]:
    print(f"\n{'='*60}")
    print(f"Testing with {n} base claims")
    print("=" * 60)

    base = get_base(lf, n)
    wb_count = base.filter(pl.col("datatype").is_in(WIKIBASE_TYPES)).height
    print(f"  ({wb_count} are wikibase-item/property)")

    t0 = time.perf_counter()
    r1 = transform_cartesian(base)
    t1 = time.perf_counter()
    print(f"Cartesian:  {t1-t0:.3f}s, {len(r1)} rows")

    t0 = time.perf_counter()
    r2 = transform_efficient(base)
    t1 = time.perf_counter()
    print(f"Efficient:  {t1-t0:.3f}s, {len(r2)} rows")

    # Verify same results
    assert len(r1) == len(r2), f"Row count mismatch: {len(r1)} vs {len(r2)}"
diff --git a/debug_claims_map_elems.py b/debug_claims_map_elems.py
 #!/usr/bin/env python3
 """Benchmark map_elements vs join-based lookup for label extraction."""

 import time
 from pathlib import Path

 import polars as pl

 claims_file = Path("results/claims/chunk_0-00400-of-00546.parquet")
 WIKIBASE_TYPES = ["wikibase-item", "wikibase-property"]


 def get_base(n: int | None = None) -> pl.LazyFrame:
    """Get base claims data, optionally limited to n rows."""
    lf = (
        pl.scan_parquet(claims_file)
        .select(pl.col("claims").explode().struct.unnest())
        .drop("key")
        .explode("value")
        .unnest("value")
        .unnest("mainsnak")
        .filter(pl.col("datatype").is_in(WIKIBASE_TYPES))
    )
    if n:
        lf = lf.head(n)
    return lf


 def transform_map_elements(base: pl.LazyFrame) -> pl.LazyFrame:
    """Current approach using map_elements (slow, no parallelization)."""
    return (
        base.with_columns(
            pl.col("datavalue")
            .struct.field("labels")
            .list.eval(pl.element().struct.field("key"))
            .alias("dv_langs")
        )
        .explode("property-labels")
        .with_columns(
            pl.col("property-labels").struct.rename_fields(
                ["language", "property_label"]
            )
        )
        .unnest("property-labels")
        .filter(pl.col("language").is_in(pl.col("dv_langs")))
        .with_columns(
            pl.struct(["datavalue", "language"])
            .map_elements(
                lambda row: next(
                    (
                        item["value"]
                        for item in (row["datavalue"]["labels"] or [])
                        if item["key"] == row["language"]
                    ),
                    None,
                ),
                return_dtype=pl.String,
            )
            .alias("datavalue_label")
        )
        .drop("dv_langs")
    )


 def transform_join_lookup(base: pl.LazyFrame) -> pl.LazyFrame:
    """Native Polars approach: join-based lookup."""
    # Add row index to correlate after exploding
    indexed = base.with_row_index("_row_id")

    # Create lookup table: explode labels, extract key/value
    labels_lookup = (
        indexed.select(
            "_row_id", pl.col("datavalue").struct.field("labels").alias("dv_labels")
        )
        .explode("dv_labels")
        .with_columns(
            pl.col("dv_labels").struct.field("key").alias("_lang"),
            pl.col("dv_labels").struct.field("value").alias("datavalue_label"),
        )
        .select("_row_id", "_lang", "datavalue_label")
    )

    # Main table: explode property-labels
    main = (
        indexed.explode("property-labels")
        .with_columns(
            pl.col("property-labels").struct.rename_fields(
                ["language", "property_label"]
            )
        )
        .unnest("property-labels")
    )

    # Join to get matching labels - inner join naturally filters to matches only
    return main.join(
        labels_lookup,
        left_on=["_row_id", "language"],
        right_on=["_row_id", "_lang"],
        how="inner",
    ).drop("_row_id")


 # Benchmark
 for n in [1, 5, 10, 50, 500, None]:
    print(f"\n{'='*60}")
    print(f"Testing with {n} claims")
    print("=" * 60)

    base = get_base(n)
    count = base.select(pl.len()).collect().item()
    print(f"  Wikibase claims: {count}")

    # map_elements approach
    t0 = time.perf_counter()
    r1 = transform_map_elements(base).collect()
    t1 = time.perf_counter()
    map_time = t1 - t0
    print(f"  map_elements:  {map_time:.3f}s, {len(r1)} rows")

    # Join lookup approach
    t0 = time.perf_counter()
    r2 = transform_join_lookup(base).collect()
    t1 = time.perf_counter()
    join_time = t1 - t0
    print(f"  join_lookup:   {join_time:.3f}s, {len(r2)} rows")

    speedup = map_time / join_time if join_time > 0 else float("inf")
    print(f"  Speedup: {speedup:.1f}x")

    # Verify results match
    if len(r1) == len(r2):
        print(f"  ✓ Row counts match")
        # Check values
        cols = ["property", "language", "property_label", "datavalue_label"]
        r1_check = r1.sort(cols[:2]).select(cols).head(10)
        r2_check = r2.sort(cols[:2]).select(cols).head(10)
        if r1_check.equals(r2_check):
            print(f"  ✓ Values match")
        else:
            print(f"  ⚠️  Values differ!")
            print("map_elements:")
            print(r1_check)
            print("join_lookup:")
            print(r2_check)
    else:
        print(f"  ⚠️  Row count mismatch: {len(r1)} vs {len(r2)}")
diff --git a/debug_claims_map_elems_RESULT.py b/debug_claims_map_elems_RESULT.py
 ============================================================                                                                                                                                                                                                                                                                                                               
 Testing with 1 claims                                                                                                                                                                                                                                                                                                                                                      
 ============================================================                                                                                                                                                                                                                                                                                                               
  Wikibase claims: 1                                                                                                                                                                                                                                                                                                                                                       
  map_elements:  1.162s, 158 rows                                                                                                                                                    
  join_lookup:   1.078s, 158 rows                                                                                                                                                                                                                                                                                                                                          
  Speedup: 1.1x                                                                                                                                                                      
  ✓ Row counts match                                                                                                                                                                 
  ✓ Values match                                                                                                                                                                                                                                                                                                                                                           
                                                                                                                                                                                                                                                                                                                                                                           
 ============================================================                                                                                                                         
 Testing with 5 claims                                                                                                                                                                                                                                                                                                                                                      
 ============================================================                                                                                                                                                                                                                                                                                                               
  Wikibase claims: 5                                                                                                                                                                                                                                                                                                                                                       
  map_elements:  1.177s, 533 rows                                                                                                                                                    
  join_lookup:   1.143s, 533 rows                                                                                                                                                                                                                                                                                                                                          
  Speedup: 1.0x                                                                                                                                                                                                                                                                                                                                                            
  ✓ Row counts match                                                                                                                                                                                                                                                                                                                                                       
  ✓ Values match                                                                                                                                                                     
                                                                                                                                                                                                                                                                                                                                                                           
 ============================================================                                                                                                                         
 Testing with 10 claims                                                                                                                                                                                                                                                                                                                                                     
 ============================================================                                                                                                                                                                                                                                                                                                               
  Wikibase claims: 10                                                                                                                                                                
  map_elements:  1.222s, 741 rows                                                                                                                                                    
  join_lookup:   1.130s, 741 rows                                                                                                                                                    
  Speedup: 1.1x                                                                                                                                                                      
  ✓ Row counts match                                                                                                                                                                                                                                                                                                                                                       
  ✓ Values match                                                                                                                                                                                                                                                                                                                                                           
                                                                                                                                                                                                                                                                                                                                                                           
 ============================================================                                                                                                                                                                                                                                                                                                               
 Testing with 50 claims                                                                                                                                                                                                                                                                                                                                                     
 ============================================================                                                                                                                         
  Wikibase claims: 50                                                                                                                                                                                                                                                                                                                                                      
  map_elements:  1.452s, 3839 rows                                                                                                                                                                                                                                                                                                                                         
  join_lookup:   1.223s, 3839 rows                                                                                                                                                                                                                                                                                                                                         
  Speedup: 1.2x                                                                                                                                                                      
  ✓ Row counts match                                                                                                                                                                 
  ✓ Values match                                                                                                                                                                                                                                                                                                                                                           
                                                                                                                                                                                                                                                                                                                                                                           
 ============================================================                                                                                                                         
 Testing with 500 claims                                                                                                                                                              
 ============================================================                                                                                                                         
  Wikibase claims: 500                                                                                                                                                               
  map_elements:  4.636s, 41417 rows                                                                                                                                                                                                                                                                                                                                        
  join_lookup:   2.155s, 41417 rows                                                                                                                                                                                                                                                                                                                                        
  Speedup: 2.2x                                                                                                                                                                                                                                                                                                                                                            
  ✓ Row counts match                                                                                                                                                                 
  ✓ Values match                                                                                                                                                                                                                                                                                                                                                           
                                                                                                                                                                                                                                                                                                                                                                           
 ============================================================                                                                                                                                                                                                                                                                                                               
 Testing with None claims                                                                                                                                                                                                                                                                                                                                                   
 ============================================================                                                                                                                         
  Wikibase claims: 6180                                                                                                                                                                                                                                                                                                                                                    
  map_elements:  44.759s, 558850 rows                                                                                                                                                
  join_lookup:   10.567s, 558850 rows                                                                                                                                                
  Speedup: 4.2x                                                                                                                                                                      
  ✓ Row counts match                                                                                                                                                                 
  ✓ Values match           
diff --git a/debug_claims_RESULT.py b/debug_claims_RESULT.py
 (wikidata) louis 🌟 ~/lab/wikidata $ time python debug_claims5.py 

 ============================================================
 Testing with 10 base claims
 ============================================================
  (4 are wikibase-item/property)
 Cartesian:  3.361s, 395 rows
 Efficient:  0.070s, 395 rows

 ============================================================
 Testing with 50 base claims
 ============================================================
  (28 are wikibase-item/property)
 Cartesian:  10.071s, 1993 rows
 Efficient:  0.258s, 1993 rows

 ============================================================
 Testing with 100 base claims
 ============================================================
  (47 are wikibase-item/property)
 Cartesian:  16.785s, 3717 rows
 Efficient:  0.334s, 3717 rows

 ============================================================
 Testing with 500 base claims
 ============================================================
  (159 are wikibase-item/property)
 Cartesian:  41.403s, 14579 rows
 Efficient:  1.602s, 14579 rows

 real    1m23.240s
 user    1m10.232s
 sys     0m38.544s
	import polars as pl
	from pathlib import Path

	claims_file = Path("results/claims/chunk_0-00400-of-00546.parquet")

	lf = pl.scan_parquet(claims_file)

	base = (
	lf.select(pl.col("claims").explode().struct.unnest())
	.drop("key")
	.explode("value")
	.unnest("value")
	.unnest("mainsnak")
	.head(1)
	.collect()
	)

	print(base)
	print()
	print(base.schema)
	#!/usr/bin/env python3
	"""Compare performance of cartesian vs efficient approach."""

	import time
	from pathlib import Path

	import polars as pl

	claims_file = Path("results/claims/chunk_0-00400-of-00546.parquet")
	WIKIBASE_TYPES = ["wikibase-item", "wikibase-property"]


	def get_base(lf: pl.LazyFrame, n: int) -> pl.DataFrame:
	return (
	lf.select(pl.col("claims").explode().struct.unnest())
	.drop("key")
	.explode("value")
	.unnest("value")
	.unnest("mainsnak")
	.head(n)
	.collect()
	)


	def transform_cartesian(base: pl.DataFrame) -> pl.DataFrame:
	"""Original approach - cartesian product then filter."""
	return (
	base.lazy()
	.filter(pl.col("datatype").is_in(WIKIBASE_TYPES))
	.explode("property-labels")
	.with_columns(
	pl.col("property-labels").struct.rename_fields(
	["prop_lang", "property_label"]
	)
	)
	.unnest("property-labels")
	.with_columns(pl.col("datavalue").struct.field("labels").alias("dv_labels"))
	.explode("dv_labels")
	.with_columns(
	pl.col("dv_labels").struct.rename_fields(["dv_lang", "datavalue_label"])
	)
	.unnest("dv_labels")
	.filter(pl.col("prop_lang") == pl.col("dv_lang"))
	.rename({"prop_lang": "language"})
	.drop("dv_lang")
	.collect()
	)


	def transform_efficient(base: pl.DataFrame) -> pl.DataFrame:
	"""Optimized - filter before extraction, use map_elements for lookup."""
	return (
	base.lazy()
	.filter(pl.col("datatype").is_in(WIKIBASE_TYPES))
	# Get available languages in datavalue.labels
	.with_columns(
	pl.col("datavalue")
	.struct.field("labels")
	.list.eval(pl.element().struct.field("key"))
	.alias("_dv_langs")
	)
	# Explode property-labels only
	.explode("property-labels")
	.with_columns(
	pl.col("property-labels").struct.rename_fields(
	["language", "property_label"]
	)
	)
	.unnest("property-labels")
	# Filter to languages that exist in datavalue.labels
	.filter(pl.col("language").is_in(pl.col("_dv_langs")))
	# Extract matching label without exploding
	.with_columns(
	pl.struct(["datavalue", "language"])
	.map_elements(
	lambda row: next(
	(
	item["value"]
	for item in (row["datavalue"]["labels"] or [])
	if item["key"] == row["language"]
	),
	None,
	),
	return_dtype=pl.String,
	)
	.alias("datavalue_label")
	)
	.drop("_dv_langs")
	.collect()
	)


	# Test with increasing sizes
	lf = pl.scan_parquet(claims_file)

	for n in [10, 50, 100, 500]:
	print(f"\n{'='*60}")
	print(f"Testing with {n} base claims")
	print("=" * 60)

	base = get_base(lf, n)
	wb_count = base.filter(pl.col("datatype").is_in(WIKIBASE_TYPES)).height
	print(f" ({wb_count} are wikibase-item/property)")

	t0 = time.perf_counter()
	r1 = transform_cartesian(base)
	t1 = time.perf_counter()
	print(f"Cartesian: {t1-t0:.3f}s, {len(r1)} rows")

	t0 = time.perf_counter()
	r2 = transform_efficient(base)
	t1 = time.perf_counter()
	print(f"Efficient: {t1-t0:.3f}s, {len(r2)} rows")

	# Verify same results
	assert len(r1) == len(r2), f"Row count mismatch: {len(r1)} vs {len(r2)}"
	#!/usr/bin/env python3
	"""Benchmark map_elements vs join-based lookup for label extraction."""

	import time
	from pathlib import Path

	import polars as pl

	claims_file = Path("results/claims/chunk_0-00400-of-00546.parquet")
	WIKIBASE_TYPES = ["wikibase-item", "wikibase-property"]


	def get_base(n: int \| None = None) -> pl.LazyFrame:
	"""Get base claims data, optionally limited to n rows."""
	lf = (
	pl.scan_parquet(claims_file)
	.select(pl.col("claims").explode().struct.unnest())
	.drop("key")
	.explode("value")
	.unnest("value")
	.unnest("mainsnak")
	.filter(pl.col("datatype").is_in(WIKIBASE_TYPES))
	)
	if n:
	lf = lf.head(n)
	return lf


	def transform_map_elements(base: pl.LazyFrame) -> pl.LazyFrame:
	"""Current approach using map_elements (slow, no parallelization)."""
	return (
	base.with_columns(
	pl.col("datavalue")
	.struct.field("labels")
	.list.eval(pl.element().struct.field("key"))
	.alias("dv_langs")
	)
	.explode("property-labels")
	.with_columns(
	pl.col("property-labels").struct.rename_fields(
	["language", "property_label"]
	)
	)
	.unnest("property-labels")
	.filter(pl.col("language").is_in(pl.col("dv_langs")))
	.with_columns(
	pl.struct(["datavalue", "language"])
	.map_elements(
	lambda row: next(
	(
	item["value"]
	for item in (row["datavalue"]["labels"] or [])
	if item["key"] == row["language"]
	),
	None,
	),
	return_dtype=pl.String,
	)
	.alias("datavalue_label")
	)
	.drop("dv_langs")
	)


	def transform_join_lookup(base: pl.LazyFrame) -> pl.LazyFrame:
	"""Native Polars approach: join-based lookup."""
	# Add row index to correlate after exploding
	indexed = base.with_row_index("_row_id")

	# Create lookup table: explode labels, extract key/value
	labels_lookup = (
	indexed.select(
	"_row_id", pl.col("datavalue").struct.field("labels").alias("dv_labels")
	)
	.explode("dv_labels")
	.with_columns(
	pl.col("dv_labels").struct.field("key").alias("_lang"),
	pl.col("dv_labels").struct.field("value").alias("datavalue_label"),
	)
	.select("_row_id", "_lang", "datavalue_label")
	)

	# Main table: explode property-labels
	main = (
	indexed.explode("property-labels")
	.with_columns(
	pl.col("property-labels").struct.rename_fields(
	["language", "property_label"]
	)
	)
	.unnest("property-labels")
	)

	# Join to get matching labels - inner join naturally filters to matches only
	return main.join(
	labels_lookup,
	left_on=["_row_id", "language"],
	right_on=["_row_id", "_lang"],
	how="inner",
	).drop("_row_id")


	# Benchmark
	for n in [1, 5, 10, 50, 500, None]:
	print(f"\n{'='*60}")
	print(f"Testing with {n} claims")
	print("=" * 60)

	base = get_base(n)
	count = base.select(pl.len()).collect().item()
	print(f" Wikibase claims: {count}")

	# map_elements approach
	t0 = time.perf_counter()
	r1 = transform_map_elements(base).collect()
	t1 = time.perf_counter()
	map_time = t1 - t0
	print(f" map_elements: {map_time:.3f}s, {len(r1)} rows")

	# Join lookup approach
	t0 = time.perf_counter()
	r2 = transform_join_lookup(base).collect()
	t1 = time.perf_counter()
	join_time = t1 - t0
	print(f" join_lookup: {join_time:.3f}s, {len(r2)} rows")

	speedup = map_time / join_time if join_time > 0 else float("inf")
	print(f" Speedup: {speedup:.1f}x")

	# Verify results match
	if len(r1) == len(r2):
	print(f" ✓ Row counts match")
	# Check values
	cols = ["property", "language", "property_label", "datavalue_label"]
	r1_check = r1.sort(cols[:2]).select(cols).head(10)
	r2_check = r2.sort(cols[:2]).select(cols).head(10)
	if r1_check.equals(r2_check):
	print(f" ✓ Values match")
	else:
	print(f" ⚠️ Values differ!")
	print("map_elements:")
	print(r1_check)
	print("join_lookup:")
	print(r2_check)
	else:
	print(f" ⚠️ Row count mismatch: {len(r1)} vs {len(r2)}")
	============================================================
	Testing with 1 claims
	============================================================
	Wikibase claims: 1
	map_elements: 1.162s, 158 rows
	join_lookup: 1.078s, 158 rows
	Speedup: 1.1x
	✓ Row counts match
	✓ Values match

	============================================================
	Testing with 5 claims
	============================================================
	Wikibase claims: 5
	map_elements: 1.177s, 533 rows
	join_lookup: 1.143s, 533 rows
	Speedup: 1.0x
	✓ Row counts match
	✓ Values match

	============================================================
	Testing with 10 claims
	============================================================
	Wikibase claims: 10
	map_elements: 1.222s, 741 rows
	join_lookup: 1.130s, 741 rows
	Speedup: 1.1x
	✓ Row counts match
	✓ Values match

	============================================================
	Testing with 50 claims
	============================================================
	Wikibase claims: 50
	map_elements: 1.452s, 3839 rows
	join_lookup: 1.223s, 3839 rows
	Speedup: 1.2x
	✓ Row counts match
	✓ Values match

	============================================================
	Testing with 500 claims
	============================================================
	Wikibase claims: 500
	map_elements: 4.636s, 41417 rows
	join_lookup: 2.155s, 41417 rows
	Speedup: 2.2x
	✓ Row counts match
	✓ Values match

	============================================================
	Testing with None claims
	============================================================
	Wikibase claims: 6180
	map_elements: 44.759s, 558850 rows
	join_lookup: 10.567s, 558850 rows
	Speedup: 4.2x
	✓ Row counts match
	✓ Values match
	(wikidata) louis 🌟 ~/lab/wikidata $ time python debug_claims5.py

	============================================================
	Testing with 10 base claims
	============================================================
	(4 are wikibase-item/property)
	Cartesian: 3.361s, 395 rows
	Efficient: 0.070s, 395 rows

	============================================================
	Testing with 50 base claims
	============================================================
	(28 are wikibase-item/property)
	Cartesian: 10.071s, 1993 rows
	Efficient: 0.258s, 1993 rows

	============================================================
	Testing with 100 base claims
	============================================================
	(47 are wikibase-item/property)
	Cartesian: 16.785s, 3717 rows
	Efficient: 0.334s, 3717 rows

	============================================================
	Testing with 500 base claims
	============================================================
	(159 are wikibase-item/property)
	Cartesian: 41.403s, 14579 rows
	Efficient: 1.602s, 14579 rows

	real 1m23.240s
	user 1m10.232s
	sys 0m38.544s