87 lines
2.4 KiB
Python
Executable file
87 lines
2.4 KiB
Python
Executable file
#!/usr/bin/env python
|
|
|
|
"""Script for papeless-ngx to decrypt pdf files secured with a password
|
|
Passwords to check ar in file /usr/src/paperless/scripts/passwords.txt
|
|
Original from https://piep.tech/posts/automatic-password-removal-in-paperless-ngx/
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import pikepdf
|
|
|
|
|
|
PWD_PATH = "/usr/src/paperless/scripts/passwords.txt"
|
|
#PWD_PATH = "input/passwords.txt"
|
|
|
|
|
|
def read_passwords_from_file():
|
|
"""Read Paswors from file"""
|
|
passwords = []
|
|
|
|
with open(PWD_PATH, "r", encoding="utf-8") as f:
|
|
passwords = f.readlines()
|
|
|
|
passwords = list(filter(len, map(str.strip, passwords)))
|
|
if not passwords:
|
|
print("Empty password file")
|
|
|
|
passwords.append("") # some PDFs are encrypted with empty password
|
|
return passwords
|
|
|
|
|
|
def unlock_pdf(file_path, password_list):
|
|
"""Removes Password from PDF. A list of passwords is tried through."""
|
|
for pwd in password_list:
|
|
try:
|
|
with pikepdf.open(file_path, password=pwd, allow_overwriting_input=True) as pdf:
|
|
pdf.save(file_path)
|
|
return True
|
|
except pikepdf.PasswordError:
|
|
continue
|
|
return False
|
|
|
|
|
|
def is_pdf_encrypted(file_path):
|
|
"""Checks if a PDF file is encrypted"""
|
|
try:
|
|
with pikepdf.open(file_path) as pdf:
|
|
return pdf.is_encrypted
|
|
except pikepdf.PasswordError:
|
|
return True
|
|
|
|
|
|
def is_pdf(file_path):
|
|
"""Check if Filename ends with PDF. File is not opened"""
|
|
return os.path.splitext(file_path)[-1].lower() == ".pdf"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
doc_path = os.environ.get('DOCUMENT_WORKING_PATH')
|
|
|
|
if not doc_path:
|
|
doc_path = sys.argv[1]
|
|
|
|
if not doc_path:
|
|
print("Neither DOCUMENT_WORKING_PATH set nor document passed by parameter.")
|
|
sys.exit(1)
|
|
|
|
if not os.path.exists(doc_path):
|
|
print(f"Document {doc_path} not found.")
|
|
sys.exit(1)
|
|
|
|
if not is_pdf(doc_path):
|
|
print(f"Document {doc_path} is not a PDF. So no decryption needed.")
|
|
sys.exit(0)
|
|
|
|
if not is_pdf_encrypted(doc_path):
|
|
print(f"Document {doc_path} was already decrypted and no password needed.")
|
|
sys.exit(0)
|
|
|
|
pwds = read_passwords_from_file()
|
|
if unlock_pdf(doc_path, pwds):
|
|
print(f"Document {doc_path} successfull decrypted.")
|
|
sys.exit(0)
|
|
else:
|
|
print(f"Document {doc_path} could not be decrypted.")
|
|
sys.exit(1)
|