paperless: move script to this git
This commit is contained in:
parent
c70b5f90d1
commit
a478052194
4 changed files with 88 additions and 2 deletions
|
@ -16,3 +16,9 @@ tab_width = 2
|
|||
|
||||
[*.md]
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
[*.py]
|
||||
end_of_line = lf
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
tab_width = 4
|
||||
|
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,2 +1,3 @@
|
|||
.env
|
||||
**/.vscode/*
|
||||
**/.vscode/*
|
||||
paperless/scripts/passwords.txt
|
||||
|
|
|
@ -40,7 +40,7 @@ services:
|
|||
retries: 5
|
||||
volumes:
|
||||
- ${VOLUMES_PATH}/paperless/data:/usr/src/paperless/data
|
||||
- ${VOLUMES_PATH}/paperless/scripts:/usr/src/paperless/scripts
|
||||
- ./scripts:/usr/src/paperless/scripts
|
||||
- ${MEDIA_PATH}:/usr/src/paperless/media
|
||||
- ${EXPORT_PATH}:/usr/src/paperless/export
|
||||
- ${CONSUME_PATH}:/usr/src/paperless/consume
|
||||
|
|
79
paperless/scripts/removePdfPassword.py
Normal file
79
paperless/scripts/removePdfPassword.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""Script for papeless-ngx to decrypt pdf files secured with a password
|
||||
Passwords to check ar in file /usr/src/paperless/scripts/passwords.txt
|
||||
Original from https://piep.tech/posts/automatic-password-removal-in-paperless-ngx/
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import pikepdf
|
||||
|
||||
|
||||
PWD_PATH = "/usr/src/paperless/scripts/passwords.txt"
|
||||
#PWD_PATH = "input/passwords.txt"
|
||||
|
||||
|
||||
def read_passwords_from_file():
|
||||
"""Read Paswors from file"""
|
||||
passwords = []
|
||||
|
||||
with open(PWD_PATH, "r", encoding="utf-8") as f:
|
||||
passwords = f.readlines()
|
||||
|
||||
passwords = list(filter(len, map(str.strip, passwords)))
|
||||
if not passwords:
|
||||
print("Empty password file")
|
||||
|
||||
return passwords
|
||||
|
||||
|
||||
def unlock_pdf(file_path, password_list):
|
||||
"""Removes Password from PDF. A list of passwords is tried through."""
|
||||
for pwd in password_list:
|
||||
try:
|
||||
with pikepdf.open(file_path, password=pwd, allow_overwriting_input=True) as pdf:
|
||||
pdf.save(file_path)
|
||||
return True
|
||||
except pikepdf.PasswordError:
|
||||
continue
|
||||
return False
|
||||
|
||||
|
||||
def is_pdf_encrypted(file_path):
|
||||
"""Checks if a PDF file is encrypted"""
|
||||
try:
|
||||
with pikepdf.open(file_path) as pdf:
|
||||
return pdf.is_encrypted
|
||||
except pikepdf.PasswordError:
|
||||
return True
|
||||
|
||||
|
||||
def is_pdf(file_path):
|
||||
"""Check if Filename ends with PDF. File is not opened"""
|
||||
return os.path.splitext(file_path)[-1].lower() == ".pdf"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
doc_path = os.environ.get('DOCUMENT_WORKING_PATH')
|
||||
|
||||
if not os.path.exists(doc_path):
|
||||
print(f"Document {doc_path} not found.")
|
||||
sys.exit(1)
|
||||
|
||||
if not is_pdf(doc_path):
|
||||
print(f"Document {doc_path} is not a PDF. So no decryption needed.")
|
||||
sys.exit(0)
|
||||
|
||||
if not is_pdf_encrypted(doc_path):
|
||||
print(f"Document {doc_path} was already decrypted and no password needed.")
|
||||
sys.exit(0)
|
||||
|
||||
pwds = read_passwords_from_file()
|
||||
if unlock_pdf(doc_path, pwds):
|
||||
print(f"Document {doc_path} successfull decrypted.")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(f"Document {doc_path} could not be decrypted.")
|
||||
sys.exit(1)
|
Loading…
Reference in a new issue