Skip to content

Instantly share code, notes, and snippets.

@aqzlpm11
aqzlpm11 / pdf.py
Created June 25, 2018 13:52
pdf 重排
#!/usr/bin/env python
import sys
try:
from PyPDF2 import PdfFileReader, PdfFileWriter
except ImportError:
from pyPdf import PdfFileReader, PdfFileWriter
import os
def concatenate(input_files, output_file):
input_streams = []
@aqzlpm11
aqzlpm11 / hive-import-csv-demo.py
Created July 18, 2018 08:22
Hive import csv demo
from pyhive import hive# or import hive
from pyhdfs import HdfsClient
data_csv = '/root/demo/data/train.csv'
hdfs = HdfsClient('hadoopmaster.hadoop.test', user_name='admin')
hdfs.copy_from_local(data_csv, '/user/admin/data/titanic/train.csv')
hive_cursor= hive.connect('hadoopslave1.hadoop.test', username='admin').cursor()
@aqzlpm11
aqzlpm11 / add_pdf_bookmarks.py
Last active October 6, 2018 16:39
cnki add pdf bookmarks
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Origin: https://github.com/RussellLuo/pdfbookmarker
import sys
import os
import re
from PyPDF2 import PdfFileMerger, PdfFileReader
def addBookmarks(pdf_in_filename, bookmarks_tree, pdf_out_filename=None):
@aqzlpm11
aqzlpm11 / to_mp4.sh
Created March 2, 2019 07:14
ffmpge 压缩视频 微信
ffmpeg -i \#Architecture.mov -ss 00:00:01 -vcodec libx264 -preset fast -crf 23 -vf "scale=640:-1" -acodec libmp3lame -ab 128k architecure.mp4
# -ss 00:00:01 cut begin at 00:00:01
# -crf 23 压缩程度(数字越大,体积越小)
# -vf "scale=640:-1" 缩放
@aqzlpm11
aqzlpm11 / ahk_record.ahk
Created June 4, 2020 11:01
Recording script for AHK
;-----------------------------------
; Macro Recorder v2.1 By FeiYue
;
; Description: This script records the mouse
; and keyboard actions and then plays back.
;
; F1 --> Record(Screen) (CoordMode, Mouse, Screen)
; F2 --> Record(Window) (CoordMode, Mouse, Window)
; F3 --> Stop Record/Play
; F4 --> Play LogFile
@aqzlpm11
aqzlpm11 / image_to_pdf.py
Created June 11, 2020 15:11
images to pdf
from PIL import Image
import os
im_list = [Image.open('abc/'+i).convert('RGB') for i in sorted(os.listdir('abc')) if i.endswith(".png")]
im_list[0].save('output.pdf', "PDF" ,resolution=100.0, save_all=True, append_images=im_list[1:])
@aqzlpm11
aqzlpm11 / image_save_server.py
Created June 11, 2020 15:12
save base64 image from browser post request
"""
// js code
function di(page) {
var canName = "page_" + page;
var canvas = document.getElementById(canName);
var dataUrl = canvas.toDataURL();
//console.log(dataUrl)
$.post('http://localhost:19290/'+page+'.png', dataUrl)
}
for (let i = 69; i <= 75; i++) {
@aqzlpm11
aqzlpm11 / remove_ad_for_sao_miao_quan_neng_wang.py
Created August 26, 2020 12:07
去除 扫描全能王 pdf 的广告二维码
import sys
import img2pdf
# =========== 总流程 ============
# pdf --> jpg (抽取内部原有的jpg)
# 过滤掉不要的图片
# jpg --> pdf
# ===============================
def extract_all_jpg_from_pdf(pdf_file):
res = []
@aqzlpm11
aqzlpm11 / stat_wavs.py
Created January 21, 2021 06:35
统计语音时长
from pathlib import Path
import soundfile as sf
from tqdm import tqdm
def get_info(audio_file_list):
res = []
for wav in tqdm(audio_file_list):
if not Path(wav).exists():
print(f"Warning: File not exists: {wav}")
continue
@aqzlpm11
aqzlpm11 / zotero_clean_extra.js
Created February 25, 2021 05:28
批量清除zotero中extra字段(当前清除包含citations的行)
// Reference: https://forums.zotero.org/discussion/82558/rdf-extra-field
var fieldName = "extra";
var fieldID = Zotero.ItemFields.getID(fieldName);
var s = new Zotero.Search();
s.libraryID = Zotero.Libraries.userLibraryID;
s.addCondition(fieldName, 'contains', ': ');
s.addCondition('joinMode', 'any');
var ids = await s.search();