#!/usr/bin/env python # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo, Mutation, Range from pyaccumulo.iterators import * from pyaccumulo.proxy.ttypes import IteratorSetting, IteratorScope from examples.util import hashcode import hashlib, re import settings conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = "regexes" if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) wr = conn.create_batch_writer(table) license_file = "LICENSE" linenum = 0 with file(license_file) as infile: for line in infile: linenum += 1 m = Mutation(str(linenum)) m.put(cf="e", cq="", val=line.strip()) wr.add_mutation(m) wr.close() regex1 = RegExFilter(priority=21, val_regex=".*stated.*", match_substring=True, name="RegExFilter1") regex2 = RegExFilter(priority=22, val_regex='.*patent', match_substring=True, name="RegExFilter2") regex3 = RegExFilter(priority=23, val_regex='have made', match_substring=True, name="RegExFilter3") for e in conn.batch_scan(table, cols=[["e"]], iterators=[regex1, regex2, regex3]): print e conn.close()