python code: extract pdf metadata

发布时间 2023-09-02 16:28:33作者: ChrainY

file name is extract_pdf_metadata.py

import PyPDF2
import sys

def extract_academic_metadata(pdf_file):
    try:
        pdf = PyPDF2.PdfReader(open(pdf_file, 'rb'))
        metadata = pdf.metadata
        # for key, value in metadata.items():
        #    print(f'{key}: {value}')
            
        publisher_name = metadata.get('/Subject', 'N/A')
        # You can add more custom metadata fields as needed
        print(f'Journal: {publisher_name}')
    except Exception as e:
        print(f'Error: {e}')

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python extract_metadata.py <pdf_file>")
    else:
        pdf_file = sys.argv[1]
        extract_academic_metadata(pdf_file)