cmungall · June 12, 2019 01:46
diff --git a/make_gdrive_index.py b/make_gdrive_index.py
 #!/usr/bin/env python3
 import os
 import re
 import logging
 import click

 # don't index these
 excludes = {
    'single_files',
    'Icon',
    '.svn',
    '.git'
 }

 # indentation (for markdown)
 SPC = '    '

 # regex for matching gdoc JSON indices
 pat = re.compile('"url": "(https:\\S+)",')

 @click.command(help=
               """
               generate index of a synced google drive folder.

               First you need to sync your google drive to disk using "Backup and Sync from Google"

               This will create a folder like "~/Google Drive"

               This script will crawl that folder and make an HTML index that (YMMV) is
               faster to search and navigate than the web interface.

               """
 )
 @click.option('-t', '--outformat', default='html', help='md or html')
 @click.option('-d', '--dir', default= "/Users/cjm/Google Drive", help='location of synced gdrive folder')
 def main(outformat, dir):
    rootpath = dir.split(os.sep)
    len_rp = len(rootpath)
    is_html = outformat == 'html'
    #print("Dir: {}".format(dir))
    last_lp = -1

    for root, subdirs, files in os.walk(dir):
        path = root.split(os.sep)
        pathstr = "/".join(path)
        rlink = re.sub('[^0-9a-zA-Z]+', '', pathstr.replace(dir, ""))
        
        if len([x for x in path if x.strip('\m\n') in excludes]) > 0:
            continue
        lp = len(path) - len_rp
        bn = os.path.basename(root)
        url = 'https://drive.google.com/drive/u/0/search?q={}'.format(bn)

        # ugly logic for switching between HTML and Markdown.
        # Original idea was to support MD and use pandoc, but pandoc too slow
        if is_html:
            while last_lp < lp:
                last_lp += 1
                open_ul(last_lp)
            while last_lp > lp:
                close_ul(last_lp)
                last_lp -= 1

        if is_html:
            print('{}<span><li><a name="{}"/><a href="{}">{}</a>[<a href="file://{}/">local</a>] <a href="#{}">[*]</a> <a href="#{}-files">--></a></li></span>\n'.format((lp+1) * SPC, rlink, url, bn, pathstr, rlink, rlink))
        else:
            print('{} * [{}]({})'.format(lp * SPC, bn, url))

        if is_html:
            open_ul(lp+1)
        for file in files:
            url = None
            if (file.endswith('.gdoc') or file.endswith('gsheet')):
                with open(pathstr + '/' + file) as s:
                    line = s.readline()
                    m = pat.search(line)
                    if m:
                        url = m.group(1)
                    else:
                        logging.error("No URL in: {}".format(line))

            if not url:
                url = 'file://{}/{}'.format(pathstr,file)
            if is_html:
                print('{}<li><a href="{}">{}</a></li>\n'.format((lp+2) * SPC, url, file))
            else:
                print('{}* [{}]({})'.format((lp+1) * SPC, file, url))
        if is_html:
            print('<a name="{}-files"/>'.format(rlink))
            close_ul(lp+1)

        last_lp = lp

 # Too lazy to use a python HTML lib...
 def open_ul(depth):
    html('ul', depth) 
 def close_ul(depth):
    html('/ul', depth)

 def html(el, depth):
    print('{}<{}>'.format(depth * SPC, el))
            
 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	import os
	import re
	import logging
	import click

	# don't index these
	excludes = {
	'single_files',
	'Icon',
	'.svn',
	'.git'
	}

	# indentation (for markdown)
	SPC = ' '

	# regex for matching gdoc JSON indices
	pat = re.compile('"url": "(https:\\S+)",')

	@click.command(help=
	"""
	generate index of a synced google drive folder.

	First you need to sync your google drive to disk using "Backup and Sync from Google"

	This will create a folder like "~/Google Drive"

	This script will crawl that folder and make an HTML index that (YMMV) is
	faster to search and navigate than the web interface.

	"""
	)
	@click.option('-t', '--outformat', default='html', help='md or html')
	@click.option('-d', '--dir', default= "/Users/cjm/Google Drive", help='location of synced gdrive folder')
	def main(outformat, dir):
	rootpath = dir.split(os.sep)
	len_rp = len(rootpath)
	is_html = outformat == 'html'
	#print("Dir: {}".format(dir))
	last_lp = -1

	for root, subdirs, files in os.walk(dir):
	path = root.split(os.sep)
	pathstr = "/".join(path)
	rlink = re.sub('[^0-9a-zA-Z]+', '', pathstr.replace(dir, ""))

	if len([x for x in path if x.strip('\m\n') in excludes]) > 0:
	continue
	lp = len(path) - len_rp
	bn = os.path.basename(root)
	url = 'https://drive.google.com/drive/u/0/search?q={}'.format(bn)

	# ugly logic for switching between HTML and Markdown.
	# Original idea was to support MD and use pandoc, but pandoc too slow
	if is_html:
	while last_lp < lp:
	last_lp += 1
	open_ul(last_lp)
	while last_lp > lp:
	close_ul(last_lp)
	last_lp -= 1

	if is_html:
	print('{}<span><li><a name="{}"/><a href="{}">{}</a>[<a href="file://{}/">local</a>] <a href="#{}">[]</a> <a href="#{}-files">--></a></li></span>\n'.format((lp+1) SPC, rlink, url, bn, pathstr, rlink, rlink))
	else:
	print('{} * [{}]({})'.format(lp * SPC, bn, url))

	if is_html:
	open_ul(lp+1)
	for file in files:
	url = None
	if (file.endswith('.gdoc') or file.endswith('gsheet')):
	with open(pathstr + '/' + file) as s:
	line = s.readline()
	m = pat.search(line)
	if m:
	url = m.group(1)
	else:
	logging.error("No URL in: {}".format(line))

	if not url:
	url = 'file://{}/{}'.format(pathstr,file)
	if is_html:
	print('{}<li><a href="{}">{}</a></li>\n'.format((lp+2) * SPC, url, file))
	else:
	print('{}* [{}]({})'.format((lp+1) * SPC, file, url))
	if is_html:
	print('<a name="{}-files"/>'.format(rlink))
	close_ul(lp+1)

	last_lp = lp

	# Too lazy to use a python HTML lib...
	def open_ul(depth):
	html('ul', depth)
	def close_ul(depth):
	html('/ul', depth)

	def html(el, depth):
	print('{}<{}>'.format(depth * SPC, el))

	if __name__ == "__main__":
	main()