#!/usr/bin/env python """Script for papeless-ngx to decrypt pdf files secured with a password Passwords to check ar in file /usr/src/paperless/scripts/passwords.txt Original from https://piep.tech/posts/automatic-password-removal-in-paperless-ngx/ """ import sys import os import pikepdf PWD_PATH = "/usr/src/paperless/scripts/passwords.txt" #PWD_PATH = "input/passwords.txt" def read_passwords_from_file(): """Read Paswors from file""" passwords = [] with open(PWD_PATH, "r", encoding="utf-8") as f: passwords = f.readlines() passwords = list(filter(len, map(str.strip, passwords))) if not passwords: print("Empty password file") return passwords def unlock_pdf(file_path, password_list): """Removes Password from PDF. A list of passwords is tried through.""" for pwd in password_list: try: with pikepdf.open(file_path, password=pwd, allow_overwriting_input=True) as pdf: pdf.save(file_path) return True except pikepdf.PasswordError: continue return False def is_pdf_encrypted(file_path): """Checks if a PDF file is encrypted""" try: with pikepdf.open(file_path) as pdf: return pdf.is_encrypted except pikepdf.PasswordError: return True def is_pdf(file_path): """Check if Filename ends with PDF. File is not opened""" return os.path.splitext(file_path)[-1].lower() == ".pdf" if __name__ == "__main__": doc_path = os.environ.get('DOCUMENT_WORKING_PATH') if not os.path.exists(doc_path): print(f"Document {doc_path} not found.") sys.exit(1) if not is_pdf(doc_path): print(f"Document {doc_path} is not a PDF. So no decryption needed.") sys.exit(0) if not is_pdf_encrypted(doc_path): print(f"Document {doc_path} was already decrypted and no password needed.") sys.exit(0) pwds = read_passwords_from_file() if unlock_pdf(doc_path, pwds): print(f"Document {doc_path} successfull decrypted.") sys.exit(0) else: print(f"Document {doc_path} could not be decrypted.") sys.exit(1)