From a4780521940a77fca088e01a9aa574b16ce019de Mon Sep 17 00:00:00 2001 From: Florian Zirker Date: Fri, 23 Feb 2024 18:20:41 +0100 Subject: [PATCH] paperless: move script to this git --- .editorconfig | 6 ++ .gitignore | 3 +- paperless/docker-compose.yaml | 2 +- paperless/scripts/removePdfPassword.py | 79 ++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 paperless/scripts/removePdfPassword.py diff --git a/.editorconfig b/.editorconfig index ec07101..b7a329e 100644 --- a/.editorconfig +++ b/.editorconfig @@ -16,3 +16,9 @@ tab_width = 2 [*.md] trim_trailing_whitespace = false + +[*.py] +end_of_line = lf +indent_style = space +indent_size = 4 +tab_width = 4 diff --git a/.gitignore b/.gitignore index 8682eff..ba238b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .env -**/.vscode/* \ No newline at end of file +**/.vscode/* +paperless/scripts/passwords.txt diff --git a/paperless/docker-compose.yaml b/paperless/docker-compose.yaml index 9e77420..53c1caf 100644 --- a/paperless/docker-compose.yaml +++ b/paperless/docker-compose.yaml @@ -40,7 +40,7 @@ services: retries: 5 volumes: - ${VOLUMES_PATH}/paperless/data:/usr/src/paperless/data - - ${VOLUMES_PATH}/paperless/scripts:/usr/src/paperless/scripts + - ./scripts:/usr/src/paperless/scripts - ${MEDIA_PATH}:/usr/src/paperless/media - ${EXPORT_PATH}:/usr/src/paperless/export - ${CONSUME_PATH}:/usr/src/paperless/consume diff --git a/paperless/scripts/removePdfPassword.py b/paperless/scripts/removePdfPassword.py new file mode 100644 index 0000000..4931931 --- /dev/null +++ b/paperless/scripts/removePdfPassword.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +"""Script for papeless-ngx to decrypt pdf files secured with a password +Passwords to check ar in file /usr/src/paperless/scripts/passwords.txt +Original from https://piep.tech/posts/automatic-password-removal-in-paperless-ngx/ +""" + +import sys +import os +import pikepdf + + +PWD_PATH = "/usr/src/paperless/scripts/passwords.txt" +#PWD_PATH = "input/passwords.txt" + + +def read_passwords_from_file(): + """Read Paswors from file""" + passwords = [] + + with open(PWD_PATH, "r", encoding="utf-8") as f: + passwords = f.readlines() + + passwords = list(filter(len, map(str.strip, passwords))) + if not passwords: + print("Empty password file") + + return passwords + + +def unlock_pdf(file_path, password_list): + """Removes Password from PDF. A list of passwords is tried through.""" + for pwd in password_list: + try: + with pikepdf.open(file_path, password=pwd, allow_overwriting_input=True) as pdf: + pdf.save(file_path) + return True + except pikepdf.PasswordError: + continue + return False + + +def is_pdf_encrypted(file_path): + """Checks if a PDF file is encrypted""" + try: + with pikepdf.open(file_path) as pdf: + return pdf.is_encrypted + except pikepdf.PasswordError: + return True + + +def is_pdf(file_path): + """Check if Filename ends with PDF. File is not opened""" + return os.path.splitext(file_path)[-1].lower() == ".pdf" + + +if __name__ == "__main__": + + doc_path = os.environ.get('DOCUMENT_WORKING_PATH') + + if not os.path.exists(doc_path): + print(f"Document {doc_path} not found.") + sys.exit(1) + + if not is_pdf(doc_path): + print(f"Document {doc_path} is not a PDF. So no decryption needed.") + sys.exit(0) + + if not is_pdf_encrypted(doc_path): + print(f"Document {doc_path} was already decrypted and no password needed.") + sys.exit(0) + + pwds = read_passwords_from_file() + if unlock_pdf(doc_path, pwds): + print(f"Document {doc_path} successfull decrypted.") + sys.exit(0) + else: + print(f"Document {doc_path} could not be decrypted.") + sys.exit(1)