Open navigation menu

Scribd

0% found this document useful (0 votes)

45 views2 pages

From Selenium Import Webdriver

web spraping

Uploaded by

Thai Jean jacques

Copyright

© © All Rights Reserved

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

45 views2 pages

From Selenium Import Webdriver

web spraping

Uploaded by

Thai Jean jacques

Copyright

© © All Rights Reserved

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

from selenium import webdriver

from [Link] import Service as FirefoxService

from webdriver_manager.firefox import GeckoDriverManager
from bs4 import BeautifulSoup

# Spécifier le chemin souhaité pour le téléchargement de GeckoDriver

geckodriver_path = 'C:\\Users\\thaij\\Desktop\\geckodriver-v0.34.0-win32'

# Configurer le WebDriver pour Firefox avec le chemin spécifié

driver = [Link](service=FirefoxService(geckodriver_path))

# URL de la page HTML à récupérer

url = "[Link]

# Ouvrir l'URL dans le navigateur automatisé

[Link](url)

# Attendre que la page se charge complètement

driver.implicitly_wait(10) # Vous pouvez ajuster le temps d'attente selon
vos besoins

# Récupérer le contenu HTML de la page

html_content = driver.page_source

# Utiliser BeautifulSoup pour analyser le contenu HTML

soup = BeautifulSoup(html_content, '[Link]')

# Afficher le HTML de manière formatée

print([Link]())

# Trouver le tableau avec une classe spécifique (ajustez selon le HTML

réel)
table = [Link]('table', {'class': 'table table-striped table-hover'})

if table:
# Extraire les en-têtes du tableau
headers = []
for th in table.find_all('th'):
[Link](th.get_text(strip=True))

# Afficher les en-têtes

print("Headers:", headers)

# Extraire les lignes du tableau

rows = []
for tr in table.find_all('tr')[1:]: # Skip the header row
cells = []
for td in tr.find_all('td'):
[Link](td.get_text(strip=True))
[Link](cells)

# Afficher les lignes

for row in rows:
print(row)
else:
print("Tableau non trouvé.")

# Fermer le navigateur automatisé

[Link]()
from selenium import webdriver
from [Link] import Service as FirefoxService
from webdriver_manager.firefox import GeckoDriverManager
from bs4 import BeautifulSoup

# Configurer le WebDriver pour Firefox avec GeckoDriverManager

geckodriver_path = GeckoDriverManager().install()
driver = [Link](service=FirefoxService(geckodriver_path))

# URL de la page HTML à récupérer

url = "[Link]

# Ouvrir l'URL dans le navigateur automatisé

[Link](url)

# Attendre que la page se charge complètement

driver.implicitly_wait(10) # Vous pouvez ajuster le temps d'attente selon
vos besoins

# Récupérer le contenu HTML de la page

html_content = driver.page_source

# Utiliser BeautifulSoup pour analyser le contenu HTML

soup = BeautifulSoup(html_content, '[Link]')

# Afficher le HTML de manière formatée

#print([Link]())

# Trouver le tableau avec une classe spécifique (ajustez selon le HTML

réel)
table = [Link]('table', {'class': 'table table-striped table-hover'})

if table:
# Extraire les en-têtes du tableau
headers = []
for th in table.find_all('th'):
[Link](th.get_text(strip=True))

# Afficher les en-têtes

print("Headers:", headers)

# Extraire les lignes du tableau

rows = []
for tr in table.find_all('tr')[1:]: # Skip the header row
cells = []
for td in tr.find_all('td'):
[Link](td.get_text(strip=True))
[Link](cells)

# Afficher les lignes

for row in rows:
print(row)
else:
print("Tableau non trouvé.")

# Fermer le navigateur automatisé

[Link]()

You might also like

Py
No ratings yet
Py
1 page
Final PRJ
No ratings yet
Final PRJ
2 pages
Jun 05 11 - 02 AM
No ratings yet
Jun 05 11 - 02 AM
2 pages
Bls Auto - Py
No ratings yet
Bls Auto - Py
1 page
Clique Sur Clavier
No ratings yet
Clique Sur Clavier
3 pages
G
No ratings yet
G
2 pages
Web Scraping Faculty Data Guide
No ratings yet
Web Scraping Faculty Data Guide
5 pages
Correction Application JSP
No ratings yet
Correction Application JSP
4 pages
SQL Injection Attack Lab Guide
No ratings yet
SQL Injection Attack Lab Guide
4 pages
Comfort Theory in Pediatric Nursing
No ratings yet
Comfort Theory in Pediatric Nursing
9 pages
Remote Guide - Gmail Setup Guide
No ratings yet
Remote Guide - Gmail Setup Guide
10 pages
Onetrust DR Migration
No ratings yet
Onetrust DR Migration
23 pages
Lesson 2 - Detailed Lesson Plan For Cot1
No ratings yet
Lesson 2 - Detailed Lesson Plan For Cot1
5 pages
Question Bank Grade 10
No ratings yet
Question Bank Grade 10
12 pages
Eset
No ratings yet
Eset
7 pages
Introduction to ICT and Web Evolution
No ratings yet
Introduction to ICT and Web Evolution
2 pages
Process Hollowing: Hacker's Code Injection
No ratings yet
Process Hollowing: Hacker's Code Injection
14 pages
Authentication Protocols OAuth, SAML, OpenID Connect Level 2 Admin 2
No ratings yet
Authentication Protocols OAuth, SAML, OpenID Connect Level 2 Admin 2
4 pages
ARAVO Password Reset Guide for Suppliers
No ratings yet
ARAVO Password Reset Guide for Suppliers
5 pages
DM - Module 4
No ratings yet
DM - Module 4
67 pages
BCIH105 Web Designing Lab Batch 2024-28
No ratings yet
BCIH105 Web Designing Lab Batch 2024-28
2 pages
Lauden Chapter 8
No ratings yet
Lauden Chapter 8
17 pages
Azure Application Gateway Features Explained
No ratings yet
Azure Application Gateway Features Explained
3 pages
Assam OBC Caste Certificate
No ratings yet
Assam OBC Caste Certificate
1 page
Cyberoam CR100ia
No ratings yet
Cyberoam CR100ia
2 pages
Optimus Prime Solutions Web Design
No ratings yet
Optimus Prime Solutions Web Design
1 page
Goon International College Security Plan
No ratings yet
Goon International College Security Plan
3 pages
Mapua University Academic Integrity Policy
No ratings yet
Mapua University Academic Integrity Policy
7 pages
SD Circular 03 - Code Lists For Code-5 and Code-7 en
No ratings yet
SD Circular 03 - Code Lists For Code-5 and Code-7 en
3 pages
MEESEVA User Manual For DEPT Ver 1.1-Extract of House Site or D Form Patta PDF
No ratings yet
MEESEVA User Manual For DEPT Ver 1.1-Extract of House Site or D Form Patta PDF
14 pages
Adobe 2013 Breach Analysis
No ratings yet
Adobe 2013 Breach Analysis
18 pages
Course - Computer Networks, Week - UDP
No ratings yet
Course - Computer Networks, Week - UDP
1 page
Cybersecurity & Cryptography Basics
No ratings yet
Cybersecurity & Cryptography Basics
5 pages
CCNP Security Implementing Cisco Threat Control So
No ratings yet
CCNP Security Implementing Cisco Threat Control So
24 pages
Digital Marketing Strategies 12.11.2024
No ratings yet
Digital Marketing Strategies 12.11.2024
2 pages
Watch Evryleaked Now Zeledgi45
No ratings yet
Watch Evryleaked Now Zeledgi45
5 pages
Mobile X-Ray WiFi Issue Resolved
No ratings yet
Mobile X-Ray WiFi Issue Resolved
6 pages
SAP Fiori Launchpad for Portals
No ratings yet
SAP Fiori Launchpad for Portals
20 pages