JFK files released + bonus python script to download all the .pdf files in bulk
https://www.archives.gov/research/jfk/release-2025
-------—
import requests
from bs4 import BeautifulSoup
import os
from urllib.parse import urljoin
def download_pdfs(url, download_folder="pdf_downloads"):
if not os.path.exists(download_folder):
os.makedirs(download_folder)
try:
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
pdf_count = 0
for link in soup.find_all('a', href=True):
href = link['href']
absolute_url = urljoin(url, href)
if absolute_url.lower().endswith('.pdf'):
try:
pdf_response = requests.get(absolute_url, headers=headers)
pdf_response.raise_for_status()
filename = absolute_url.split('/')[-1]
if not filename.lower().endswith('.pdf'):
filename += '.pdf'
file_path = os.path.join(download_folder, filename)
with open(file_path, 'wb') as f:
f.write(pdf_response.content)
print(f"Downloaded: {filename}")
pdf_count += 1
except requests.RequestException as e:
print(f"Failed to download {absolute_url}: {e}")
print(f"nTotal PDFs downloaded: {pdf_count}")
if pdf_count == 0:
print("No PDF files found on the webpage.")
except requests.RequestException as e:
print(f"Error accessing webpage {url}: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
if name == "main":
target_url = "https://www.archives.gov/research/jfk/release-2025"
download_pdfs(target_url)