import os
import time
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
import logging
from datetime import datetime
import json
from pathlib import Path
import re
# Import credentials from config module
try:
from config import CREDENTIALS
except ImportError:
CREDENTIALS = {
'username': '',
'password': ''
}
# Logging setup
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('elektor_download.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
class ElektroMagazineDownloader:
def __init__(self, username=None, password=None, download_dir="Elektor_Magazines"):
self.username = username or CREDENTIALS.get('username')
self.password = password or CREDENTIALS.get('password')
self.download_dir = Path(download_dir).expanduser()
self.driver = None
self.session = requests.Session()
# İstatistikler
self.stats = {
'total_downloaded': 0,
'total_failed': 0,
'start_time': None,
'year_stats': {}
}
# Durum dosyası
self.state_file = self.download_dir / "download_state.json"
if not self.username or not self.password:
logger.error("Username or password not provided!")
raise ValueError("Credentials are required")
self.setup_directories()
self.load_state()
def setup_directories(self):
"""Create necessary directories"""
self.download_dir.mkdir(exist_ok=True)
logger.info(f"Download directory: {self.download_dir}")
def load_state(self):
"""Load previous download state"""
if self.state_file.exists():
try:
with open(self.state_file, 'r') as f:
self.stats = json.load(f)
logger.info("Loaded previous download state")
except:
logger.warning("Could not load state file")
def save_state(self):
"""Save current download state"""
try:
with open(self.state_file, 'w') as f:
json.dump(self.stats, f, indent=2)
except:
logger.warning("Could not save state file")
def setup_driver(self):
"""Setup Firefox WebDriver with CORRECT download path"""
try:
options = Options()
# IMPORTANT: Set download directory to the main download folder
# NOT the year subfolder - Firefox will download here, then we'll move it
options.set_preference("browser.download.folderList", 2)
options.set_preference("browser.download.dir", str(self.download_dir))
options.set_preference("browser.download.useDownloadDir", True)
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
options.set_preference("pdfjs.disabled", True)
options.set_preference("browser.download.manager.showWhenStarting", False)
# Find geckodriver
geckodriver_paths = [
"/usr/local/bin/geckodriver",
"/usr/bin/geckodriver",
Path.home() / ".local/bin/geckodriver"
]
for path in geckodriver_paths:
if os.path.exists(path):
service = Service(executable_path=path)
self.driver = webdriver.Firefox(service=service, options=options)
break
else:
self.driver = webdriver.Firefox(options=options)
self.driver.set_window_size(1920, 1080)
logger.info("Firefox WebDriver ready")
return True
except Exception as e:
logger.error(f"WebDriver setup failed: {e}")
return False
def login(self):
"""Login to Elektro Magazine"""
try:
logger.info("Logging in...")
self.driver.get("https://www.elektormagazine.com/account/login")
time.sleep(5)
# Email
email_selectors = ["input[name='email']", "input[type='email']"]
for selector in email_selectors:
try:
email = self.driver.find_element(By.CSS_SELECTOR, selector)
email.send_keys(self.username)
break
except:
continue
time.sleep(1)
# Password
password_selectors = ["input[name='password']", "input[type='password']"]
for selector in password_selectors:
try:
password = self.driver.find_element(By.CSS_SELECTOR, selector)
password.send_keys(self.password)
break
except:
continue
time.sleep(1)
# Enter key
from selenium.webdriver.common.keys import Keys
password.send_keys(Keys.RETURN)
time.sleep(5)
# Check if login successful
if "login" in self.driver.current_url:
logger.error("Login failed!")
return False
logger.info("Login successful!")
return True
except Exception as e:
logger.error(f"Login error: {e}")
return False
def get_magazine_links_for_year(self, year):
"""Get all magazine page links for a specific year"""
url = f"https://www.elektormagazine.com/magazine-archive/{year}"
logger.info(f"Scanning year {year}: {url}")
try:
self.driver.get(url)
time.sleep(3)
magazine_links = []
all_links = self.driver.find_elements(By.TAG_NAME, "a")
for link in all_links:
try:
href = link.get_attribute("href")
if href and f'/magazine/{year}/' in href:
if href not in magazine_links:
magazine_links.append(href)
except:
continue
# Remove duplicates
magazine_links = list(set(magazine_links))
logger.info(f"Found {len(magazine_links)} magazines for {year}")
return magazine_links
except Exception as e:
logger.error(f"Error scanning year {year}: {e}")
return []
def get_download_link(self, magazine_url):
"""Get PDF download link from magazine page"""
try:
logger.debug(f"Getting download link: {magazine_url}")
self.driver.get(magazine_url)
time.sleep(3)
# Use JavaScript to find expose links
expose_links = self.driver.execute_script("""
var links = document.getElementsByTagName('a');
var exposeLinks = [];
for (var i = 0; i < links.length; i++) {
var href = links[i].href;
if (href && href.includes('/expose/')) {
exposeLinks.push(href);
}
}
return exposeLinks;
""")
if expose_links:
logger.info(f"Found expose links via JavaScript: {expose_links}")
return expose_links[0]
return None
except Exception as e:
logger.error(f"Error getting download link: {e}")
return None
def get_magazine_title(self, magazine_url):
"""Get magazine title for filename"""
try:
self.driver.get(magazine_url)
time.sleep(2)
# Get page title
title = self.driver.title
# Clean the title
# Remove common prefixes and suffixes
clean_title = title.replace('Elektor', '').replace('elektor', '').strip()
clean_title = re.sub(r'\s+', ' ', clean_title) # Remove extra spaces
clean_title = re.sub(r'[^\w\s-]', '', clean_title) # Remove special chars
# If title is too long or empty, use a simpler version
if not clean_title or len(clean_title) < 3:
# Extract from URL
match = re.search(r'/magazine/\d+/(\d+)', magazine_url)
if match:
issue_num = match.group(1)
clean_title = f"Issue_{issue_num}"
else:
clean_title = "Magazine"
return clean_title[:50] # Limit length
except Exception as e:
logger.error(f"Error getting magazine title: {e}")
return "Magazine"
def generate_filename(self, year, index, magazine_url, magazine_title=None):
"""Generate a proper filename"""
try:
if not magazine_title:
magazine_title = self.get_magazine_title(magazine_url)
# Create filename parts
filename_parts = []
# Add year
filename_parts.append(str(year))
# Add magazine title
if magazine_title:
# Clean title for filename
clean_title = magazine_title.replace(' ', '_')
clean_title = re.sub(r'[^\w_-]', '', clean_title)
filename_parts.append(clean_title)
# Add index if needed
if index > 0:
filename_parts.append(f"{index:03d}")
# Create filename
filename = "_".join(filename_parts) + ".pdf"
# Ensure it's not too long
if len(filename) > 100:
filename = filename[:100] + ".pdf"
logger.debug(f"Generated filename: {filename}")
return filename
except Exception as e:
logger.error(f"Error generating filename: {e}")
return f"Elektor_{year}_{index:03d}.pdf"
def download_pdf(self, download_url, year, filename):
"""Download PDF file and move it to year folder"""
try:
logger.info(f"Downloading: {filename}")
# Create year directory
year_dir = self.download_dir / str(year)
year_dir.mkdir(exist_ok=True)
# Full file path in year directory
year_filepath = year_dir / filename
# Check if file already exists in year directory
if year_filepath.exists():
file_size = year_filepath.stat().st_size
if file_size > 102400: # At least 100KB
logger.info(f"File already exists in year directory: {filename}")
return True
# Navigate to download URL
self.driver.get(download_url)
# Wait for download
time.sleep(10)
# Monitor download in MAIN directory
max_wait = 90 # Maximum wait time in seconds
wait_interval = 5
waited = 0
while waited < max_wait:
# Check for newly downloaded PDFs in MAIN directory
pdf_files = []
for f in self.download_dir.iterdir():
if f.is_file() and f.suffix.lower() == '.pdf':
# Check if file was modified recently
mod_time = f.stat().st_mtime
if time.time() - mod_time < 60: # Modified in last 60 seconds
pdf_files.append(f)
if pdf_files:
# Find the most recently modified PDF
latest_pdf = max(pdf_files, key=lambda x: x.stat().st_mtime)
file_size = latest_pdf.stat().st_size
if file_size > 102400: # At least 100KB
# Move to year directory
if latest_pdf != year_filepath:
# If target exists, remove it
if year_filepath.exists():
year_filepath.unlink()
# Move the file
latest_pdf.rename(year_filepath)
logger.info(f"Moved {latest_pdf.name} to year directory as {filename}")
logger.info(f"Downloaded: {filename} ({file_size:,} bytes)")
return True
# Wait and check again
time.sleep(wait_interval)
waited += wait_interval
logger.debug(f"Waiting for download... ({waited}/{max_wait} seconds)")
logger.warning(f"Download timed out for: {filename}")
return False
except Exception as e:
logger.error(f"Download error for {filename}: {e}")
return False
def process_year(self, year, skip_existing=True):
"""Process all magazines for a year"""
logger.info(f"\n{'='*60}")
logger.info(f"PROCESSING YEAR {year}")
logger.info(f"{'='*60}")
start_time = time.time()
# Create year directory
year_dir = self.download_dir / str(year)
year_dir.mkdir(exist_ok=True)
# Get existing files if skipping
existing_files = []
if skip_existing:
existing_files = [f.name for f in year_dir.iterdir() if f.suffix.lower() == '.pdf']
if existing_files:
logger.info(f"Found {len(existing_files)} existing PDFs in year directory")
# Get magazine links
magazine_links = self.get_magazine_links_for_year(year)
if not magazine_links:
logger.warning(f"No magazines found for {year}")
return 0, 0
successful = 0
failed = 0
# Process each magazine
for i, magazine_url in enumerate(magazine_links, 1):
try:
logger.info(f"\n[{year}] Magazine {i}/{len(magazine_links)}")
logger.info(f"URL: {magazine_url}")
# Get download link
download_url = self.get_download_link(magazine_url)
if not download_url:
logger.warning("No download link found")
failed += 1
continue
# Generate filename
filename = self.generate_filename(year, i, magazine_url)
# Check if file already exists
if skip_existing and filename in existing_files:
logger.info(f"Skipping existing file: {filename}")
successful += 1
continue
# Download PDF
if self.download_pdf(download_url, year, filename):
successful += 1
logger.info(f"✓ Success: {filename}")
else:
failed += 1
logger.error(f"✗ Failed: {filename}")
# Be nice to the server
time.sleep(2)
except KeyboardInterrupt:
logger.info("Interrupted by user")
break
except Exception as e:
logger.error(f"Error processing magazine: {e}")
failed += 1
continue
# Save statistics
elapsed = time.time() - start_time
self.stats['year_stats'][str(year)] = {
'successful': successful,
'failed': failed,
'time': elapsed
}
self.stats['total_downloaded'] += successful
self.stats['total_failed'] += failed
self.save_state()
logger.info(f"\nYear {year} summary:")
logger.info(f" Successful: {successful}")
logger.info(f" Failed: {failed}")
logger.info(f" Time: {elapsed:.1f} seconds")
return successful, failed
def run(self, start_year=1974, end_year=2025, skip_existing=True):
"""Main download process"""
try:
print("\n" + "="*60)
print("ELEKTOR MAGAZINE DOWNLOADER")
print("="*60)
print(f"Download directory: {self.download_dir}")
print(f"Years: {start_year} to {end_year}")
# Setup
self.stats['start_time'] = time.time()
print("\nSetting up browser...")
if not self.setup_driver():
return False
print("Logging in...")
if not self.login():
return False
# Process years
years = list(range(start_year, end_year + 1))
total_years = len(years)
print(f"\nProcessing {total_years} years")
total_successful = 0
total_failed = 0
for i, year in enumerate(years, 1):
print(f"\n{'#'*60}")
print(f"YEAR {year} ({i}/{total_years})")
print(f"{'#'*60}")
successful, failed = self.process_year(year, skip_existing)
total_successful += successful
total_failed += failed
# Estimated time remaining
if i < total_years:
elapsed = time.time() - self.stats['start_time']
avg_time_per_year = elapsed / i
remaining_years = total_years - i
remaining_time = avg_time_per_year * remaining_years
print(f"\nEstimated time remaining: {remaining_time/60:.1f} minutes")
# Final statistics
total_time = time.time() - self.stats['start_time']
print("\n" + "="*60)
print("DOWNLOAD COMPLETE!")
print("="*60)
print(f"Total downloaded: {total_successful}")
print(f"Total failed: {total_failed}")
print(f"Total time: {total_time/60:.1f} minutes")
# Show downloaded files
print(f"\nDownload location: {self.download_dir}")
print("\nDownloaded files by year:")
for year in years:
year_dir = self.download_dir / str(year)
if year_dir.exists():
pdf_files = list(year_dir.glob("*.pdf"))
if pdf_files:
print(f"\n{year} ({len(pdf_files)} PDFs):")
for pdf in sorted(pdf_files)[:3]: # Show first 3
size_mb = pdf.stat().st_size / (1024 * 1024)
print(f" • {pdf.name} ({size_mb:.1f} MB)")
if len(pdf_files) > 3:
print(f" ... and {len(pdf_files) - 3} more")
return True
except KeyboardInterrupt:
print("\n\nProcess interrupted by user!")
return False
except Exception as e:
print(f"\n\nError: {e}")
import traceback
traceback.print_exc()
return False
finally:
if self.driver:
self.driver.quit()
print("\nBrowser closed.")
def main():
"""Main menu"""
print("\n" + "="*60)
print("ELEKTOR MAGAZINE DOWNLOADER - FIXED VERSION")
print("PDFs will be saved in year folders")
print("="*60)
# Check config
if not os.path.exists("config.py"):
print("\n❌ config.py not found!")
print("Creating template...")
template = '''# Elektor Magazine Credentials
CREDENTIALS = {
'username': 'your_email@example.com',
'password': 'your_password_here'
}
'''
with open("config.py", "w") as f:
f.write(template)
print("Please edit config.py with your credentials!")
return
# Create downloader with your path
download_dir = "~/İndirilenler/E-Kitap/Elektor/Elektor_Magazines"
downloader = ElektroMagazineDownloader(download_dir=download_dir)
# Menu
print("\n Download Options:")
print("1. Continue from where we left (skip existing files)")
print("2. Download specific year")
print("3. Download year range")
print("4. Download all years (1974-2025)")
print("5. Check existing downloads")
choice = input("\nSelect option (1-5): ").strip()
if choice == "1":
print("\n Continuing download (will skip existing files)...")
downloader.run(skip_existing=True)
elif choice == "2":
year = int(input("Enter year: "))
print(f"\n Downloading year {year}...")
downloader.run(start_year=year, end_year=year, skip_existing=True)
elif choice == "3":
start = int(input("Start year: "))
end = int(input("End year: "))
print(f"\n Downloading years {start}-{end}...")
downloader.run(start_year=start, end_year=end, skip_existing=True)
elif choice == "4":
print("\n Downloading ALL years 1974-2025...")
print("This will download ~600 magazines!")
confirm = input("Are you sure? (yes/no): ").strip().lower()
if confirm == 'yes':
downloader.run(skip_existing=True)
else:
print("Download cancelled.")
elif choice == "5":
# Check existing downloads
download_dir = Path("~/İndirilenler/E-Kitap/Elektor/Elektor_Magazines").expanduser()
if download_dir.exists():
total_pdfs = 0
print("\n Existing downloads:")
for year_dir in sorted(download_dir.iterdir()):
if year_dir.is_dir() and year_dir.name.isdigit():
pdfs = list(year_dir.glob("*.pdf"))
if pdfs:
total_pdfs += len(pdfs)
print(f"\n{year_dir.name}: {len(pdfs)} PDFs")
for pdf in sorted(pdfs)[:2]:
size_mb = pdf.stat().st_size / (1024 * 1024)
print(f" • {pdf.name} ({size_mb:.1f} MB)")
print(f"\nTotal PDFs: {total_pdfs}")
else:
print("No downloads found.")
else:
print("Invalid choice.")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\n Program terminated.")
except Exception as e:
print(f"\n\n Error: {e}")