Created
May 14, 2022 03:59
-
-
Save YasserGersy/3ee2d33599bb31a35cab6c23c52ee505 to your computer and use it in GitHub Desktop.
Python script to search for specific strings in big files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python -e | |
import sys,os,glob,time | |
errors_count=0 | |
max_id=100011265576762 | |
debug=False | |
start=time.time() | |
def GetSecondsPassed(): | |
global start | |
end=time.time() | |
r= end-start | |
r=int(r) | |
return r | |
def _contains_(line,term): | |
if line.lower().find(term)>=0: | |
return True | |
return False | |
def _contains_any(string,substring_list): | |
return any(substring in string for substring in substring_list) | |
def _contains_all(string,substring_list): | |
return all(substring in string for substring in substring_list) | |
def min(s,m): | |
if(s<m): | |
return m | |
return s | |
def dprint(s): | |
if debug: | |
print(s) | |
def listfiles(PATH,rext="*.*"): | |
all=rext=="*.*" | |
result = [os.path.join(dp, f) for dp, dn, filenames in os.walk(PATH) for f in filenames if (os.path.splitext(f)[1] == rext) or (all)] | |
return result | |
def num_lines(p): | |
z=-1 | |
try: | |
z= sum(1 for line in open(p)) | |
except:# Exception,s: | |
errors_count=errors_count+1 | |
pass | |
return z | |
def Readable_num(s):#1123456 > 1 123 456 | |
str_num=str(s) | |
numx='' | |
counter3=0 | |
tlen=len(str_num) | |
for i in range(tlen,0,-1): | |
numx=str_num[i-1]+numx | |
counter3=counter3+1 | |
if counter3==3: | |
counter3=0 | |
numx=" "+numx | |
return str(numx)#+" <"+s+">" | |
def printr(s,r): | |
print(s,end=r) | |
def check_fb_id(i1,i2): | |
try: | |
val1 = int(str(i1)) | |
val2 = int(str(i1)) | |
if val1 > val2: | |
print ( str(val1)+" is igger than max fbid "+str(val2) ) | |
except ValueError: | |
pass #print("That's not an int!") | |
def main(): | |
lst=["hello","hello0"] | |
#Banner and usage | |
print("--------------Reader v1.1-----------------------") | |
if len(sys.argv)<3: | |
print("Usage: python "+str(sys.argv) + " file.txt [email protected]") | |
print(len(sys.argv)) | |
exit() | |
#args | |
path=sys.argv[1] | |
userinput=sys.argv[2] | |
#User input to list , unique and non empty | |
terms=(userinput.strip()+",").split(",") | |
terms=list(dict.fromkeys(terms)) | |
terms=[x for x in terms if x] | |
if os.path.isfile(userinput): | |
lines=open(userinput).splitlines() | |
for l in lines: | |
terms.append(l.trim()) | |
#term=userinput | |
files=[] | |
count=0 | |
progress=False | |
all_Files_lines_c=0 | |
all_count=0 | |
all_Files_c=0 | |
errors_count=0 | |
if(os.path.isfile(path)): | |
files=[path] | |
print("[+] Looking in single path ["+path+"]") | |
elif os.path.isdir(path) : | |
files=listfiles(path,"*.*") | |
print("[+] Looking for {"+str(terms)+"} in "+str( len(files) )+" Files in Directory: ["+path+"] "+" E:"+str(errors_count)) | |
else: | |
print("[-] plz provide an existing file or directory ") | |
exit() | |
z=0 | |
for t in terms: | |
check_fb_id(t,max) | |
dprint(files) | |
dprint("\n\n") | |
#looping files | |
for fl in files: | |
file1=open(fl,'r', encoding="utf-8") | |
count=0 | |
all_Files_c+=1 | |
print(" [+] Search for '"+str(terms)+"' at:'"+fl+"' ") | |
for term in terms: | |
term=str(term).lower() | |
while True: | |
count+=1 | |
all_count+=1 | |
all_Files_lines_c=all_Files_lines_c+1 | |
#Getnextlinefromfile | |
line='' | |
try: | |
line=file1.readline() | |
dprint(line) | |
except Exception as ex: | |
errors_count=errors_count+1 | |
dprint(str(ex)) | |
break | |
#if line is empty | |
#end of file is reached | |
if not line: | |
dprint("~~~end of file-------------------------") | |
break | |
try: | |
line=str(line) | |
if _contains_(line,term): | |
ol=("\t [+] "+line)#.strip()) | |
print(ol) | |
open('output.txt','a',encoding="utf-8").write(str(ol).strip()+"\n") | |
else: | |
ac=" All:"+Readable_num(all_Files_lines_c) | |
sp=" [speed:"+str( int( min(all_Files_lines_c,1)/min(GetSecondsPassed(),1)))+" Per second ]"#avoid div by zero | |
m=" [+] Searching line :"+Readable_num(count)+("" if count==all_Files_lines_c else ac )+sp | |
printr (m,"\r") | |
except (Exception) as s: | |
print("Error135:\n"+str(s)) | |
errors_count=errors_count+1 | |
file1.close() | |
dprint("Closing file ") | |
print("\n------------End Search--------") | |
print("------------Count:"+str(count)+"--------") | |
if __name__ == "__main__": | |
try: | |
main() | |
except KeyboardInterrupt: | |
errors_count=errors_count+1 | |
print ('\n\nCancelled') | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment