sncb_crawl/main.py
2022-08-28 14:57:40 +02:00

63 lines
2.1 KiB
Python

import asyncio
import aiohttp
import csv
from datetime import datetime
from bs4 import BeautifulSoup
import re
async def main():
stations = []
with open('stations.csv') as station_file:
stations_list = csv.reader(station_file)
for row in stations_list:
stations.append(row[1])
trains = []
async with aiohttp.ClientSession() as session:
tasks = []
for station in stations:
tasks.append(fetch_station(station, session, trains))
await asyncio.gather(*tasks)
print(trains)
async def fetch_station(station: str, session: aiohttp.ClientSession, trains: list) -> None:
train_list = []
url = "http://www.belgianrail.be/jp/nmbs-realtime/stboard.exe/en"
url_data = {
'realtime': 'Show', # Dunno
'sqQueryPageDisplayed': 'yes', # Dunno
'REQProduct_list+': '5:1111111000000000', # Dunno
'input': station,
'REQ0JourneyStopsSID+':
'A=1@O=Mouscron@X=3228449@Y=50740997@U=80@L=008885704@B=1@p=1661553118@n=ac.1=GA@',
# Don't know what it does so I don't touch
'date': datetime.now().strftime('%d/%m/%Y'),
'wDayExtsq': 'Mo|Tu|We|Th|Fr|Sa|Su', # Didn't try
'time': datetime.now().strftime('%H:%M'),
'boardType': 'dep',
'maxJourneys': '10', # can be changed but have to be > 10
'start': 'Show' # Dunno
}
try:
async with session.post(url, data=url_data) as resp:
if resp.status == 200:
print(f"station {station} success ✅")
soup = BeautifulSoup(await resp.text(), 'html.parser')
products = soup.body.find_all(class_='product')[1:]
[i.a.img.decompose() for i in products]
train_list = [''.join(i.a.contents) for i in products]
for train in train_list:
if train not in trains and train:
trains.append(re.sub(" +", " ",train.strip()))
return None
except aiohttp.client_exceptions.ServerDisconnectedError:
print(f"station {station} failed ❎ ")
if __name__ == "__main__":
asyncio.run(main())