WebScraping/src/browser_window.py
2025-09-25 15:38:10 +02:00

156 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# src/browser_window.py
from PySide6.QtWidgets import (
QMainWindow, QToolBar, QMenu, QMessageBox, QFileDialog, QInputDialog, QLineEdit
)
from PySide6.QtGui import QAction
from PySide6.QtWebEngineWidgets import QWebEngineView
from PySide6.QtWebEngineCore import QWebEngineContextMenuRequest
from PySide6.QtCore import QUrl, Qt
from pathlib import Path
import io
from db import init_db, save_page, save_screenshot, save_tag, save_event
class BrowserWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Law Enforcement Web Scraper")
self.resize(1200, 800)
# ---- Web view ------------------------------------------------
self.view = QWebEngineView()
self.setCentralWidget(self.view)
# ---- Toolbar -------------------------------------------------
toolbar = QToolBar()
self.addToolBar(toolbar)
back_act = QAction("", self)
back_act.triggered.connect(self.view.back)
toolbar.addAction(back_act)
forward_act = QAction("", self)
forward_act.triggered.connect(self.view.forward)
toolbar.addAction(forward_act)
reload_act = QAction("", self)
reload_act.triggered.connect(self.view.reload)
toolbar.addAction(reload_act)
capture_act = QAction("📸 Capture", self)
capture_act.triggered.connect(self.capture_screenshot)
toolbar.addAction(capture_act)
# ---- New URL input field and Go button ----
self.url_input = QLineEdit()
self.url_input.setPlaceholderText("Enter URL")
toolbar.addWidget(self.url_input)
go_act = QAction("Go", self)
go_act.triggered.connect(self.go_to_url)
toolbar.addAction(go_act)
# ---- Signals -------------------------------------------------
self.view.urlChanged.connect(self.on_url_changed)
self.view.loadFinished.connect(self.on_load_finished)
self.view.page().profile().downloadRequested.connect(self.on_download_requested)
# Contextmenu handling for image tagging
self.view.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
self.view.customContextMenuRequested.connect(self.show_context_menu)
# Initialise DB and state variables
init_db()
self.current_page_id = None
self.pending_url = ""
# ------------------------------------------------------------------
def go_to_url(self):
"""Navigate the view to the URL typed in the toolbar."""
url_text = self.url_input.text().strip()
if url_text:
self.view.load(QUrl(url_text))
def on_url_changed(self, url: QUrl):
"""Remember the URL; HTML will be saved once the page finishes loading."""
self.pending_url = url.toString()
def on_load_finished(self, ok: bool):
if not ok:
QMessageBox.warning(self, "Load error", f"Failed to load {self.pending_url}")
return
# Grab the HTML source and persist it
self.view.page().toHtml(lambda html: self._store_page(html))
def _store_page(self, html: str):
# Save page record and keep its id for later screenshots/events
self.current_page_id = save_page(self.pending_url, html)
# Optional: automatically take a screenshot on load
# self.capture_screenshot()
# ------------------------------------------------------------------
def capture_screenshot(self):
if self.current_page_id is None:
QMessageBox.information(self, "Info", "No page loaded yet.")
return
def handle_pixmap(pix):
buffer = io.BytesIO()
pix.save(buffer, "PNG")
png_data = buffer.getvalue()
if self.current_page_id:
screenshot_id = save_screenshot(self.current_page_id, png_data)
self.prompt_tag(screenshot_id)
# grab() returns a QPixmap wrapped in a QFuture use then() callback
pix = self.view.grab()
handle_pixmap(pix)
# ------------------------------------------------------------------
def show_context_menu(self, pos):
pass
# ctx: QWebEngineContextMenuRequest = self.view.page().contextMenuData()
# if ctx.mediaType() == QWebEngineContextMenuRequest.MediaTypeImage:
# menu = QMenu(self)
# tag_act = QAction("Add tag to image", self)
# tag_act.triggered.connect(lambda: self.tag_image(ctx))
# menu.addAction(tag_act)
# menu.exec_(self.view.mapToGlobal(pos))
def tag_image(self, ctx: QWebEngineContextMenuRequest):
# Download the image data, then store it as a screenshot for tagging
img_url = ctx.mediaUrl().toString()
# Use the download API to fetch the image bytes
profile = self.view.page().profile()
profile.downloadRequested.connect(
lambda req: self._handle_image_download(req, img_url)
)
# Trigger a temporary download request via JS (creates the request)
self.view.page().runJavaScript(f'new Image().src="{img_url}";')
def _handle_image_download(self, request, expected_url):
# Accept the request; when finished we can read its data
request.accept()
request.finished.connect(lambda: self._store_image_tag(request, expected_url))
def _store_image_tag(self, request, url):
# request.reply() is a QIODevice; read all bytes
data = request.reply().readAll().data()
if self.current_page_id:
screenshot_id = save_screenshot(self.current_page_id, data)
self.prompt_tag(screenshot_id)
def prompt_tag(self, screenshot_id: int):
tag, ok = QInputDialog.getText(self, "Tag image", "Enter tag:")
if ok and tag.strip():
save_tag(screenshot_id, tag.strip())
QMessageBox.information(self, "Tagged", f"Tag saved: {tag}")
# ------------------------------------------------------------------
def on_download_requested(self, request):
# For nonimage files you might want to let the user choose a location.
default_path = QFileDialog.getSaveFileName(self, "Save file", request.suggestedFileName())[0]
if default_path:
request.setDownloadFileName(Path(default_path).name)
request.accept()