homeserver/paperless/scripts/removePdfPassword.py

80 lines
2.1 KiB
Python
Raw Normal View History

2024-02-23 18:20:41 +01:00
#!/usr/bin/env python
"""Script for papeless-ngx to decrypt pdf files secured with a password
Passwords to check ar in file /usr/src/paperless/scripts/passwords.txt
Original from https://piep.tech/posts/automatic-password-removal-in-paperless-ngx/
"""
import sys
import os
import pikepdf
PWD_PATH = "/usr/src/paperless/scripts/passwords.txt"
#PWD_PATH = "input/passwords.txt"
def read_passwords_from_file():
"""Read Paswors from file"""
passwords = []
with open(PWD_PATH, "r", encoding="utf-8") as f:
passwords = f.readlines()
passwords = list(filter(len, map(str.strip, passwords)))
if not passwords:
print("Empty password file")
return passwords
def unlock_pdf(file_path, password_list):
"""Removes Password from PDF. A list of passwords is tried through."""
for pwd in password_list:
try:
with pikepdf.open(file_path, password=pwd, allow_overwriting_input=True) as pdf:
pdf.save(file_path)
return True
except pikepdf.PasswordError:
continue
return False
def is_pdf_encrypted(file_path):
"""Checks if a PDF file is encrypted"""
try:
with pikepdf.open(file_path) as pdf:
return pdf.is_encrypted
except pikepdf.PasswordError:
return True
def is_pdf(file_path):
"""Check if Filename ends with PDF. File is not opened"""
return os.path.splitext(file_path)[-1].lower() == ".pdf"
if __name__ == "__main__":
doc_path = os.environ.get('DOCUMENT_WORKING_PATH')
if not os.path.exists(doc_path):
print(f"Document {doc_path} not found.")
sys.exit(1)
if not is_pdf(doc_path):
print(f"Document {doc_path} is not a PDF. So no decryption needed.")
sys.exit(0)
if not is_pdf_encrypted(doc_path):
print(f"Document {doc_path} was already decrypted and no password needed.")
sys.exit(0)
pwds = read_passwords_from_file()
if unlock_pdf(doc_path, pwds):
print(f"Document {doc_path} successfull decrypted.")
sys.exit(0)
else:
print(f"Document {doc_path} could not be decrypted.")
sys.exit(1)