Quickly setup env
git clone --branch v1.0 https://github.com/vesoft-inc/nebula-docker-compose.git
cd nebula-docker-compose
docker-compose up -d
docker run --rm -ti --network=host vesoft/nebula-console:v1.2.1 -u root -p nebula --addr=127.0.0.1 --port=3699
Prepare data
# ngql
# https://docs.nebula-graph.com.cn/1.2.0/manual-CN/2.query-language/4.statement-syntax/1.data-definition-statements/create-space-syntax/
CREATE SPACE test(partition_num=1, replica_factor=1);
USE test;
CREATE TAG course(name string, credits int);
INSERT VERTEX course (name, credits) VALUES 100:("cs", 12);
Scan data
nGQL
(root@nebula) [test]> FETCH PROP ON * 100;
===========================================
| VertexID | course.name | course.credits |
===========================================
| 100 | cs | 12 |
-------------------------------------------
Got 1 rows (Time spent: 7.981/9.31 ms)
StorageClient
ref: https://github.com/vesoft-inc/nebula-python/blob/v1.0/examples/StorageClientExample.py
We could see data is in storaged1
:
(root@nebula) [test]> show hosts
=============================================================================================
| Ip | Port | Status | Leader count | Leader distribution | Partition distribution |
=============================================================================================
| 172.28.2.1 | 44500 | online | 0 | No valid partition | No valid partition |
---------------------------------------------------------------------------------------------
| 172.28.2.2 | 44500 | online | 1 | test: 1 | test: 1 |
---------------------------------------------------------------------------------------------
| 172.28.2.3 | 44500 | online | 0 | No valid partition | No valid partition |
---------------------------------------------------------------------------------------------
| Total | | | 1 | test: 1 | test: 1 |
---------------------------------------------------------------------------------------------
Got 4 rows (Time spent: 1.349/2.74 ms)
from compose file:
storaged1:
image: vesoft/nebula-storaged:v1
environment:
USER: root
TZ: "${TZ}"
command:
- --meta_server_addrs=172.28.1.1:45500,172.28.1.2:45500,172.28.1.3:45500
- --local_ip=172.28.2.2
- --ws_ip=172.28.2.2
- --port=44500
- --data_path=/data/storage
- --log_dir=/logs
- --v=0
- --minloglevel=0
depends_on:
- metad0
- metad1
- metad2
healthcheck:
test: ["CMD", "curl", "-f", "http://172.28.2.2:12000/status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 20s
ports:
- "44501:44500"
- 12000
- 12002
# python3 -m pip install nebula-python
meta_client = MetaClient([("127.0.0.1", 45502)], 3000, 3)
code = meta_client.connect()
if code == ErrorCode.E_FAIL_TO_CONNECT:
raise Exception('connect to %s:%s failed' % (sys.argv[1], sys.argv[2]))
# initialize a StorageClient. Set its connection timeout to 3000ms, retry times to 3
storage_client = StorageClient(meta_client, 3000, 3)
# initialize a ScanVertexProcessor to process scanned vertex data
scan_vertex_processor = ScanVertexProcessor(meta_client)
def get_return_cols(space):
vertex_return_cols = {}
edge_return_cols = {}
tags_name = meta_client.get_tags_name(space)
for tag_name in tags_name:
vertex_return_cols[tag_name] = []
edges_name = meta_client.get_edges_name(space)
for edge_name in edges_name:
edge_return_cols[edge_name] = []
return vertex_return_cols, edge_return_cols
def process_vertex(space, scan_vertex_response):
result = scan_vertex_processor.process(space, scan_vertex_response)
if result is None:
return None
# Get the corresponding rows by tag name
for tag_name, tag_rows in result._rows.items():
for row in tag_rows:
vid = row._default_properties[0].get_value()
props = {}
for prop in row._properties:
prop_name = prop.get_name()
prop_value = prop.get_value()
props[prop_name] = prop_value
print(props)
space_to_read = "test"
vertex_return_cols, edge_return_cols = get_return_cols(space_to_read)
scan_vertex(space_to_read, vertex_return_cols, True)
Now we could see it's scanned:
In [38]: scan_vertex(space_to_read, vertex_return_cols, True)
...:
############# scanned vertex data #############
{'name': 'cs', 'credits': 12}
Reproduce issue
Now let's alter schema:
USE test;
ALTER TAG course ADD (address string);
Now, the version in data was not matched with it was in metad:
In [39]: scan_vertex(space_to_read, vertex_return_cols, True)
############# scanned vertex data #############
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[39], line 1
----> 1 scan_vertex(space_to_read, vertex_return_cols, True)
Cell In[3], line 9, in scan_vertex(space, return_cols, all_cols)
7 logging.error("Error occurs while scaning vertex")
8 break
----> 9 process_vertex(space, scan_vertex_response)
Cell In[37], line 2, in process_vertex(space, scan_vertex_response)
1 def process_vertex(space, scan_vertex_response):
----> 2 result = scan_vertex_processor.process(space, scan_vertex_response)
3 if result is None:
4 return None
File ~/.local/lib/python3.8/site-packages/nebula/ngStorage/ngProcessor/ScanVertexProcessor.py:60, in ScanVertexProcessor.process(self, space_name, scan_vertex_response)
58 tag_name = tag_id_name_map[tag_id]
59 default_properties = row_reader.vertex_key(scan_tag.vertexId, tag_name)
---> 60 properties = row_reader.decode_value(scan_tag.value)
61 rows[tag_name].append(Row(default_properties, properties))
62 else:
File ~/.local/lib/python3.8/site-packages/nebula/ngData/data.py:105, in RowReader.decode_value(self, value, schema_version)
103 properties.append(self.get_double_property(field, value))
104 elif property_type == SupportedType.STRING:
--> 105 properties.append(self.get_string_property(field, value))
106 elif property_type == SupportedType.TIMESTAMP:
107 properties.append(self.get_timestamp_property(field, value))
File ~/.local/lib/python3.8/site-packages/nebula/ngData/data.py:167, in RowReader.get_string_property(self, name, value)
165 val = str(value[self._offset:self._offset+strLen])
166 else:
--> 167 val = str(value[self._offset:self._offset+strLen], encoding='utf-8')
168 self._offset += strLen
169 return Property(SupportedType.STRING, name, val)
TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'
Instead, let's try with db_dump:
wget https://github.com/vesoft-inc/nebula/releases/download/v1.2.1/nebula-1.2.1.el7-5.x86_64.rpm
rpm2cpio nebula-1.2.1.el7-5.x86_64.rpm | cpio -idmv
docker cp usr/local/nebula/bin/db_dump nebula-docker-compose_storaged1_1:/usr/local/nebula/bin
docker exec -it nebula-docker-compose_storaged1_1 sh
# in storaged1
./db_dump --space=test --db_path=/data/storage/nebula/ --meta_server=172.28.1.3:45500
And we could scan the data:
sh-4.2# ./db_dump --space=test --db_path=/data/storage/nebula/ --meta_server=172.28.1.3:45500
===========================PARAMS============================
mode: scan
meta server: 172.28.1.3:45500
space: test
path: /data/storage/nebula/
parts:
vids:
tags:
edges:
limit: 1000
===========================PARAMS============================
[vertex] key: 1, 100, course value: cs, 12,
===========================STATISTICS============================
COUNT: 1
VERTEX COUNT: 1
EDGE COUNT: 0
TAG STATISTICS:
course : 1
EDGE STATISTICS:
============================STATISTICS===========================
Time cost: 77 us
Add limit=0 to scan all instead of sampling data:
sh-4.2# ./db_dump --space=test --db_path=/data/storage/nebula/ --meta_server=172.28.1.3:45500 --limit=0
===========================PARAMS============================
mode: scan
meta server: 172.28.1.3:45500
space: test
path: /data/storage/nebula/
parts:
vids:
tags:
edges:
limit: 0
===========================PARAMS============================
[vertex] key: 1, 100, course value: cs, 12,
===========================STATISTICS============================
COUNT: 1
VERTEX COUNT: 1
EDGE COUNT: 0
TAG STATISTICS:
course : 1
EDGE STATISTICS:
============================STATISTICS===========================
Time cost: 102 us
db_dump ref: https://docs.nebula-graph.com.cn/1.2.0/manual-CN/3.build-develop-and-administration/5.storage-service-administration/data-export/dump-tool/