0% found this document useful (0 votes)
13 views9 pages

Python Ipynb

The document contains code for web scraping data related to the V-League 2024, including team statistics such as wins, losses, and points. It uses Selenium to automate the browser and extract information from a specific webpage. The output includes a structured table displaying the teams and their respective performance metrics.

Uploaded by

hoa58582005
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views9 pages

Python Ipynb

The document contains code for web scraping data related to the V-League 2024, including team statistics such as wins, losses, and points. It uses Selenium to automate the browser and extract information from a specific webpage. The output includes a structured table displaying the teams and their respective performance metrics.

Uploaded by

hoa58582005
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd

{

"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2023/2024"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']\
n",
"['Thep Xanh Nam Dinh', 'MerryLand Quy Nhon Binh Dinh', 'Ha Noi', 'Becamex
Binh Duong', 'Hai Phong', 'Cong An Ha Noi', 'The Cong - Viettel', 'TP Ho Chi Minh',
'Dong A Thanh Hoa', 'Quang Nam', 'Song Lam Nghe An', 'LPBank Hoang Anh Gia Lai',
'Hong Linh Ha Tinh', 'Khanh Hoa']\n",
"['21', '21', '21', '21', '21', '21', '21', '21', '21', '21', '21', '21',
'21', '21']\n",
"['13', '9', '10', '10', '8', '9', '8', '8', '8', '6', '6', '6', '6', '2']\
n",
"['3', '7', '3', '3', '8', '4', '6', '5', '5', '8', '7', '7', '6', '4']\n",
"['5', '5', '8', '8', '5', '8', '7', '8', '8', '7', '8', '8', '9', '15']\n",
"['49 - 34', '34 - 24', '32 - 27', '27 - 24', '36 - 28', '32 - 27', '21 -
24', '25 - 24', '31 - 32', '24 - 27', '21 - 25', '18 - 27', '22 - 29', '15 - 35']\
n",
"['15', '10', '5', '3', '8', '5', '-3', '1', '-1', '-3', '-4', '-9', '-7', '-
20']\n",
"['26', '16', '15', '10', '18', '12', '10', '7', '11', '12', '9', '7', '9',
'8']\n",
"['34', '42', '32', '30', '32', '54', '47', '44', '53', '37', '38', '42',
'45', '33']\n",
"['2', '0', '2', '0', '1', '4', '2', '2', '5', '2', '2', '1', '2', '0']\n",
"['42', '34', '33', '33', '32', '31', '30', '29', '29', '26', '25', '25',
'24', '10']\n"
]
}
],
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.webdriver.chrome.service import Service as ChromeService\n",
"from webdriver_manager.chrome import ChromeDriverManager\n",
"from unidecode import unidecode\n",
"import time\n",
"def convert_to_unsigned_vietnamese(text_list):\n",
" return [unidecode(text) for text in text_list]\n",
"\n",
"# Thiết lập trình duyệt và khởi động webdriver\n",
"driver =
webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))\n",
"\n",
"# Truy cập vào trang web\n",
"url = \"https://vpf.vn/season/v-league-2024/?
fbclid=IwZXh0bgNhZW0CMTAAAR3mtoXg7ktZgKAePwfoOtZ6ilEJyq1glczZhzgyI5S5KVk2UDCk-
yLZlUQ_aem_AXQHjYRIC324Pa3CCtK4E4Rg0WdPaJTzzJ6hIy3guN-
fCsuZjo8_P5cRtSzCuyFUof7eWnPG2ttpepZbf_JjOt4k\"\n",
"driver.get(url)\n",
"\n",
"# Chờ một vài giây để trang tải xong (tuỳ thuộc vào trang web)\n",
"time.sleep(5) # Điều chỉnh thời gian chờ nếu cần thiết\n",
"\n",
"vitri = []\n",
"name = []\n",
"tran = []\n",
"Thang = []\n",
"Hoa = []\n",
"Thua = []\n",
"B = []\n",
"BT_BB = []\n",
"HS = []\n",
"btsk = []\n",
"Td = []\n",
"Diem = []\n",
"\n",
"# Lặp qua các hàng trong bảng và lấy dữ liệu\n",
"for i in range(1, 15):\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[1]\")\n",
" vitri.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[2]\")\n",
" name.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[3]\")\n",
" tran.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[4]\")\n",
" Thang.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[5]\")\n",
" Hoa.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[6]\")\n",
" Thua.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[7]\")\n",
" B.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[8]\")\n",
" BT_BB.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[9]\")\n",
" HS.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[10]\")\n",
" btsk.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[11]\")\n",
" Td.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[12]\")\n",
" Diem.append(element.text)\n",
"#Chuyển Tiếng Việt có dấu thành không dấu\n",
"Tenclb = convert_to_unsigned_vietnamese(name)\n",
"# In ra dữ liệu\n",
"print(vitri)\n",
"print(Tenclb)\n",
"print(tran)\n",
"print(Thang)\n",
"print(Hoa)\n",
"print(Thua)\n",
"print(B)\n",
"print(BT_BB)\n",
"print(HS)\n",
"print(btsk)\n",
"print(Td)\n",
"print(Diem)\n",
"\n",
"# Đóng trình duyệt\n",
"driver.quit()\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Vị trí</th>\n",
" <th>Đội bóng</th>\n",
" <th>Trận</th>\n",
" <th>Thắng</th>\n",
" <th>Hòa</th>\n",
" <th>Thua</th>\n",
" <th>BT_BB</th>\n",
" <th>Hiệu số</th>\n",
" <th>BTSK</th>\n",
" <th>Thẻ Vàng</th>\n",
" <th>Thẻ đỏ</th>\n",
" <th>Điểm</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Thep Xanh Nam Dinh</td>\n",
" <td>21</td>\n",
" <td>13</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>49 - 34</td>\n",
" <td>15</td>\n",
" <td>26</td>\n",
" <td>34</td>\n",
" <td>2</td>\n",
" <td>42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>MerryLand Quy Nhon Binh Dinh</td>\n",
" <td>21</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>34 - 24</td>\n",
" <td>10</td>\n",
" <td>16</td>\n",
" <td>42</td>\n",
" <td>0</td>\n",
" <td>34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Ha Noi</td>\n",
" <td>21</td>\n",
" <td>10</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>32 - 27</td>\n",
" <td>5</td>\n",
" <td>15</td>\n",
" <td>32</td>\n",
" <td>2</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Becamex Binh Duong</td>\n",
" <td>21</td>\n",
" <td>10</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>27 - 24</td>\n",
" <td>3</td>\n",
" <td>10</td>\n",
" <td>30</td>\n",
" <td>0</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Hai Phong</td>\n",
" <td>21</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>36 - 28</td>\n",
" <td>8</td>\n",
" <td>18</td>\n",
" <td>32</td>\n",
" <td>1</td>\n",
" <td>32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>Cong An Ha Noi</td>\n",
" <td>21</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>32 - 27</td>\n",
" <td>5</td>\n",
" <td>12</td>\n",
" <td>54</td>\n",
" <td>4</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>The Cong - Viettel</td>\n",
" <td>21</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>21 - 24</td>\n",
" <td>-3</td>\n",
" <td>10</td>\n",
" <td>47</td>\n",
" <td>2</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>TP Ho Chi Minh</td>\n",
" <td>21</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>25 - 24</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>44</td>\n",
" <td>2</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>Dong A Thanh Hoa</td>\n",
" <td>21</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>31 - 32</td>\n",
" <td>-1</td>\n",
" <td>11</td>\n",
" <td>53</td>\n",
" <td>5</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>Quang Nam</td>\n",
" <td>21</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>24 - 27</td>\n",
" <td>-3</td>\n",
" <td>12</td>\n",
" <td>37</td>\n",
" <td>2</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>11</td>\n",
" <td>Song Lam Nghe An</td>\n",
" <td>21</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>21 - 25</td>\n",
" <td>-4</td>\n",
" <td>9</td>\n",
" <td>38</td>\n",
" <td>2</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>12</td>\n",
" <td>LPBank Hoang Anh Gia Lai</td>\n",
" <td>21</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>18 - 27</td>\n",
" <td>-9</td>\n",
" <td>7</td>\n",
" <td>42</td>\n",
" <td>1</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>13</td>\n",
" <td>Hong Linh Ha Tinh</td>\n",
" <td>21</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>22 - 29</td>\n",
" <td>-7</td>\n",
" <td>9</td>\n",
" <td>45</td>\n",
" <td>2</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>14</td>\n",
" <td>Khanh Hoa</td>\n",
" <td>21</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>15</td>\n",
" <td>15 - 35</td>\n",
" <td>-20</td>\n",
" <td>8</td>\n",
" <td>33</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Vị trí Đội bóng Trận Thắng Hòa Thua BT_BB Hiệu
số \\\n",
"0 1 Thep Xanh Nam Dinh 21 13 3 5 49 - 34
15 \n",
"1 2 MerryLand Quy Nhon Binh Dinh 21 9 7 5 34 - 24
10 \n",
"2 3 Ha Noi 21 10 3 8 32 - 27
5 \n",
"3 4 Becamex Binh Duong 21 10 3 8 27 - 24
3 \n",
"4 5 Hai Phong 21 8 8 5 36 - 28
8 \n",
"5 6 Cong An Ha Noi 21 9 4 8 32 - 27
5 \n",
"6 7 The Cong - Viettel 21 8 6 7 21 - 24 -
3 \n",
"7 8 TP Ho Chi Minh 21 8 5 8 25 - 24
1 \n",
"8 9 Dong A Thanh Hoa 21 8 5 8 31 - 32 -
1 \n",
"9 10 Quang Nam 21 6 8 7 24 - 27 -
3 \n",
"10 11 Song Lam Nghe An 21 6 7 8 21 - 25 -
4 \n",
"11 12 LPBank Hoang Anh Gia Lai 21 6 7 8 18 - 27 -
9 \n",
"12 13 Hong Linh Ha Tinh 21 6 6 9 22 - 29 -
7 \n",
"13 14 Khanh Hoa 21 2 4 15 15 - 35 -
20 \n",
"\n",
" BTSK Thẻ Vàng Thẻ đỏ Điểm \n",
"0 26 34 2 42 \n",
"1 16 42 0 34 \n",
"2 15 32 2 33 \n",
"3 10 30 0 33 \n",
"4 18 32 1 32 \n",
"5 12 54 4 31 \n",
"6 10 47 2 30 \n",
"7 7 44 2 29 \n",
"8 11 53 5 29 \n",
"9 12 37 2 26 \n",
"10 9 38 2 25 \n",
"11 7 42 1 25 \n",
"12 9 45 2 24 \n",
"13 8 33 0 10 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df =
pd.DataFrame(list(zip(vitri,Tenclb,tran,Thang,Hoa,Thua,B,BT_BB,HS,btsk,Td,Diem)),co
lumns =[\"Vị trí\",'Đội bóng','Trận','Thắng','Hòa','Thua','BT_BB','Hiệu
số','BTSK',\"Thẻ Vàng\",'Thẻ đỏ','Điểm'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# xuất df ra csv\n",
"df.to_csv('20232024.csv',index=False)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

You might also like