Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
10 views9 pages

Python Ipynb

The document contains code for web scraping data related to the V-League 2024, including team statistics such as wins, losses, and points. It uses Selenium to automate the browser and extract information from a specific webpage. The output includes a structured table displaying the teams and their respective performance metrics.

Uploaded by

hoa58582005
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views9 pages

Python Ipynb

The document contains code for web scraping data related to the V-League 2024, including team statistics such as wins, losses, and points. It uses Selenium to automate the browser and extract information from a specific webpage. The output includes a structured table displaying the teams and their respective performance metrics.

Uploaded by

hoa58582005
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 9

{

"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2023/2024"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14']\
n",
"['Thep Xanh Nam Dinh', 'MerryLand Quy Nhon Binh Dinh', 'Ha Noi', 'Becamex
Binh Duong', 'Hai Phong', 'Cong An Ha Noi', 'The Cong - Viettel', 'TP Ho Chi Minh',
'Dong A Thanh Hoa', 'Quang Nam', 'Song Lam Nghe An', 'LPBank Hoang Anh Gia Lai',
'Hong Linh Ha Tinh', 'Khanh Hoa']\n",
"['21', '21', '21', '21', '21', '21', '21', '21', '21', '21', '21', '21',
'21', '21']\n",
"['13', '9', '10', '10', '8', '9', '8', '8', '8', '6', '6', '6', '6', '2']\
n",
"['3', '7', '3', '3', '8', '4', '6', '5', '5', '8', '7', '7', '6', '4']\n",
"['5', '5', '8', '8', '5', '8', '7', '8', '8', '7', '8', '8', '9', '15']\n",
"['49 - 34', '34 - 24', '32 - 27', '27 - 24', '36 - 28', '32 - 27', '21 -
24', '25 - 24', '31 - 32', '24 - 27', '21 - 25', '18 - 27', '22 - 29', '15 - 35']\
n",
"['15', '10', '5', '3', '8', '5', '-3', '1', '-1', '-3', '-4', '-9', '-7', '-
20']\n",
"['26', '16', '15', '10', '18', '12', '10', '7', '11', '12', '9', '7', '9',
'8']\n",
"['34', '42', '32', '30', '32', '54', '47', '44', '53', '37', '38', '42',
'45', '33']\n",
"['2', '0', '2', '0', '1', '4', '2', '2', '5', '2', '2', '1', '2', '0']\n",
"['42', '34', '33', '33', '32', '31', '30', '29', '29', '26', '25', '25',
'24', '10']\n"
]
}
],
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.webdriver.chrome.service import Service as ChromeService\n",
"from webdriver_manager.chrome import ChromeDriverManager\n",
"from unidecode import unidecode\n",
"import time\n",
"def convert_to_unsigned_vietnamese(text_list):\n",
" return [unidecode(text) for text in text_list]\n",
"\n",
"# Thiết lập trình duyệt và khởi động webdriver\n",
"driver =
webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))\n",
"\n",
"# Truy cập vào trang web\n",
"url = \"https://vpf.vn/season/v-league-2024/?
fbclid=IwZXh0bgNhZW0CMTAAAR3mtoXg7ktZgKAePwfoOtZ6ilEJyq1glczZhzgyI5S5KVk2UDCk-
yLZlUQ_aem_AXQHjYRIC324Pa3CCtK4E4Rg0WdPaJTzzJ6hIy3guN-
fCsuZjo8_P5cRtSzCuyFUof7eWnPG2ttpepZbf_JjOt4k\"\n",
"driver.get(url)\n",
"\n",
"# Chờ một vài giây để trang tải xong (tuỳ thuộc vào trang web)\n",
"time.sleep(5) # Điều chỉnh thời gian chờ nếu cần thiết\n",
"\n",
"vitri = []\n",
"name = []\n",
"tran = []\n",
"Thang = []\n",
"Hoa = []\n",
"Thua = []\n",
"B = []\n",
"BT_BB = []\n",
"HS = []\n",
"btsk = []\n",
"Td = []\n",
"Diem = []\n",
"\n",
"# Lặp qua các hàng trong bảng và lấy dữ liệu\n",
"for i in range(1, 15):\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[1]\")\n",
" vitri.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[2]\")\n",
" name.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[3]\")\n",
" tran.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[4]\")\n",
" Thang.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[5]\")\n",
" Hoa.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[6]\")\n",
" Thua.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[7]\")\n",
" B.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[8]\")\n",
" BT_BB.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[9]\")\n",
" HS.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[10]\")\n",
" btsk.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[11]\")\n",
" Td.append(element.text)\n",
" element = driver.find_element(By.XPATH,
f\"/html/body/div[6]/div[2]/div/div[2]/div/div/article/div[2]/div/div/div[4]/
div[3]/div/div/div/div[1]/table/tbody/tr[{i}]/td[12]\")\n",
" Diem.append(element.text)\n",
"#Chuyển Tiếng Việt có dấu thành không dấu\n",
"Tenclb = convert_to_unsigned_vietnamese(name)\n",
"# In ra dữ liệu\n",
"print(vitri)\n",
"print(Tenclb)\n",
"print(tran)\n",
"print(Thang)\n",
"print(Hoa)\n",
"print(Thua)\n",
"print(B)\n",
"print(BT_BB)\n",
"print(HS)\n",
"print(btsk)\n",
"print(Td)\n",
"print(Diem)\n",
"\n",
"# Đóng trình duyệt\n",
"driver.quit()\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Vị trí</th>\n",
" <th>Đội bóng</th>\n",
" <th>Trận</th>\n",
" <th>Thắng</th>\n",
" <th>Hòa</th>\n",
" <th>Thua</th>\n",
" <th>BT_BB</th>\n",
" <th>Hiệu số</th>\n",
" <th>BTSK</th>\n",
" <th>Thẻ Vàng</th>\n",
" <th>Thẻ đỏ</th>\n",
" <th>Điểm</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Thep Xanh Nam Dinh</td>\n",
" <td>21</td>\n",
" <td>13</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>49 - 34</td>\n",
" <td>15</td>\n",
" <td>26</td>\n",
" <td>34</td>\n",
" <td>2</td>\n",
" <td>42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>MerryLand Quy Nhon Binh Dinh</td>\n",
" <td>21</td>\n",
" <td>9</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>34 - 24</td>\n",
" <td>10</td>\n",
" <td>16</td>\n",
" <td>42</td>\n",
" <td>0</td>\n",
" <td>34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Ha Noi</td>\n",
" <td>21</td>\n",
" <td>10</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>32 - 27</td>\n",
" <td>5</td>\n",
" <td>15</td>\n",
" <td>32</td>\n",
" <td>2</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Becamex Binh Duong</td>\n",
" <td>21</td>\n",
" <td>10</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>27 - 24</td>\n",
" <td>3</td>\n",
" <td>10</td>\n",
" <td>30</td>\n",
" <td>0</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Hai Phong</td>\n",
" <td>21</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>36 - 28</td>\n",
" <td>8</td>\n",
" <td>18</td>\n",
" <td>32</td>\n",
" <td>1</td>\n",
" <td>32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>Cong An Ha Noi</td>\n",
" <td>21</td>\n",
" <td>9</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>32 - 27</td>\n",
" <td>5</td>\n",
" <td>12</td>\n",
" <td>54</td>\n",
" <td>4</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>The Cong - Viettel</td>\n",
" <td>21</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>21 - 24</td>\n",
" <td>-3</td>\n",
" <td>10</td>\n",
" <td>47</td>\n",
" <td>2</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>TP Ho Chi Minh</td>\n",
" <td>21</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>25 - 24</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>44</td>\n",
" <td>2</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>Dong A Thanh Hoa</td>\n",
" <td>21</td>\n",
" <td>8</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>31 - 32</td>\n",
" <td>-1</td>\n",
" <td>11</td>\n",
" <td>53</td>\n",
" <td>5</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>Quang Nam</td>\n",
" <td>21</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>24 - 27</td>\n",
" <td>-3</td>\n",
" <td>12</td>\n",
" <td>37</td>\n",
" <td>2</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>11</td>\n",
" <td>Song Lam Nghe An</td>\n",
" <td>21</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>21 - 25</td>\n",
" <td>-4</td>\n",
" <td>9</td>\n",
" <td>38</td>\n",
" <td>2</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>12</td>\n",
" <td>LPBank Hoang Anh Gia Lai</td>\n",
" <td>21</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>18 - 27</td>\n",
" <td>-9</td>\n",
" <td>7</td>\n",
" <td>42</td>\n",
" <td>1</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>13</td>\n",
" <td>Hong Linh Ha Tinh</td>\n",
" <td>21</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" <td>22 - 29</td>\n",
" <td>-7</td>\n",
" <td>9</td>\n",
" <td>45</td>\n",
" <td>2</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>14</td>\n",
" <td>Khanh Hoa</td>\n",
" <td>21</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>15</td>\n",
" <td>15 - 35</td>\n",
" <td>-20</td>\n",
" <td>8</td>\n",
" <td>33</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Vị trí Đội bóng Trận Thắng Hòa Thua BT_BB Hiệu
số \\\n",
"0 1 Thep Xanh Nam Dinh 21 13 3 5 49 - 34
15 \n",
"1 2 MerryLand Quy Nhon Binh Dinh 21 9 7 5 34 - 24
10 \n",
"2 3 Ha Noi 21 10 3 8 32 - 27
5 \n",
"3 4 Becamex Binh Duong 21 10 3 8 27 - 24
3 \n",
"4 5 Hai Phong 21 8 8 5 36 - 28
8 \n",
"5 6 Cong An Ha Noi 21 9 4 8 32 - 27
5 \n",
"6 7 The Cong - Viettel 21 8 6 7 21 - 24 -
3 \n",
"7 8 TP Ho Chi Minh 21 8 5 8 25 - 24
1 \n",
"8 9 Dong A Thanh Hoa 21 8 5 8 31 - 32 -
1 \n",
"9 10 Quang Nam 21 6 8 7 24 - 27 -
3 \n",
"10 11 Song Lam Nghe An 21 6 7 8 21 - 25 -
4 \n",
"11 12 LPBank Hoang Anh Gia Lai 21 6 7 8 18 - 27 -
9 \n",
"12 13 Hong Linh Ha Tinh 21 6 6 9 22 - 29 -
7 \n",
"13 14 Khanh Hoa 21 2 4 15 15 - 35 -
20 \n",
"\n",
" BTSK Thẻ Vàng Thẻ đỏ Điểm \n",
"0 26 34 2 42 \n",
"1 16 42 0 34 \n",
"2 15 32 2 33 \n",
"3 10 30 0 33 \n",
"4 18 32 1 32 \n",
"5 12 54 4 31 \n",
"6 10 47 2 30 \n",
"7 7 44 2 29 \n",
"8 11 53 5 29 \n",
"9 12 37 2 26 \n",
"10 9 38 2 25 \n",
"11 7 42 1 25 \n",
"12 9 45 2 24 \n",
"13 8 33 0 10 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df =
pd.DataFrame(list(zip(vitri,Tenclb,tran,Thang,Hoa,Thua,B,BT_BB,HS,btsk,Td,Diem)),co
lumns =[\"Vị trí\",'Đội bóng','Trận','Thắng','Hòa','Thua','BT_BB','Hiệu
số','BTSK',\"Thẻ Vàng\",'Thẻ đỏ','Điểm'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# xuất df ra csv\n",
"df.to_csv('20232024.csv',index=False)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

You might also like