paperless: move script to this git

This commit is contained in:
Florian Zirker 2024-02-23 18:20:41 +01:00
parent c70b5f90d1
commit a478052194
4 changed files with 88 additions and 2 deletions

View file

@ -16,3 +16,9 @@ tab_width = 2
[*.md]
trim_trailing_whitespace = false
[*.py]
end_of_line = lf
indent_style = space
indent_size = 4
tab_width = 4

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
.env
**/.vscode/*
paperless/scripts/passwords.txt

View file

@ -40,7 +40,7 @@ services:
retries: 5
volumes:
- ${VOLUMES_PATH}/paperless/data:/usr/src/paperless/data
- ${VOLUMES_PATH}/paperless/scripts:/usr/src/paperless/scripts
- ./scripts:/usr/src/paperless/scripts
- ${MEDIA_PATH}:/usr/src/paperless/media
- ${EXPORT_PATH}:/usr/src/paperless/export
- ${CONSUME_PATH}:/usr/src/paperless/consume

View file

@ -0,0 +1,79 @@
#!/usr/bin/env python
"""Script for papeless-ngx to decrypt pdf files secured with a password
Passwords to check ar in file /usr/src/paperless/scripts/passwords.txt
Original from https://piep.tech/posts/automatic-password-removal-in-paperless-ngx/
"""
import sys
import os
import pikepdf
PWD_PATH = "/usr/src/paperless/scripts/passwords.txt"
#PWD_PATH = "input/passwords.txt"
def read_passwords_from_file():
"""Read Paswors from file"""
passwords = []
with open(PWD_PATH, "r", encoding="utf-8") as f:
passwords = f.readlines()
passwords = list(filter(len, map(str.strip, passwords)))
if not passwords:
print("Empty password file")
return passwords
def unlock_pdf(file_path, password_list):
"""Removes Password from PDF. A list of passwords is tried through."""
for pwd in password_list:
try:
with pikepdf.open(file_path, password=pwd, allow_overwriting_input=True) as pdf:
pdf.save(file_path)
return True
except pikepdf.PasswordError:
continue
return False
def is_pdf_encrypted(file_path):
"""Checks if a PDF file is encrypted"""
try:
with pikepdf.open(file_path) as pdf:
return pdf.is_encrypted
except pikepdf.PasswordError:
return True
def is_pdf(file_path):
"""Check if Filename ends with PDF. File is not opened"""
return os.path.splitext(file_path)[-1].lower() == ".pdf"
if __name__ == "__main__":
doc_path = os.environ.get('DOCUMENT_WORKING_PATH')
if not os.path.exists(doc_path):
print(f"Document {doc_path} not found.")
sys.exit(1)
if not is_pdf(doc_path):
print(f"Document {doc_path} is not a PDF. So no decryption needed.")
sys.exit(0)
if not is_pdf_encrypted(doc_path):
print(f"Document {doc_path} was already decrypted and no password needed.")
sys.exit(0)
pwds = read_passwords_from_file()
if unlock_pdf(doc_path, pwds):
print(f"Document {doc_path} successfull decrypted.")
sys.exit(0)
else:
print(f"Document {doc_path} could not be decrypted.")
sys.exit(1)