Chapter 7 Appendix

7.1 Data Scraping Functions

7.1.1 OUA webscraper

import re
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from urllib.request import urlopen as uReq
import csv
from collections import defaultdict
#import pprint
import csv

def get_links():
    print("getting links...")
    teams = ['algoma', 'brock', 'carleton', 'guelph', 'lakehead', 'laurentian',
             'laurier', 'mcmaster', 'nipissing', 'ottawa', 'queens', 'ryerson',
             'toronto', 'waterloo', 'western', 'windsor', 'york']
    years = ['2014-15', '2015-16', '2016-17', '2017-18', '2018-19']
    original_url = 'http://oua.ca/sports/mbkb/'
    end_url = '?view=gamelog'
    href_list = []
    for year in years:
        for team in teams:
            current_url = original_url + year + '/teams/' + team + end_url
            r = requests.get(current_url)
            raw_html = r.content
            soup = BeautifulSoup(raw_html, 'html.parser')
            tables = soup.findAll('table')
            max_len = 0
            index = 0

            for i in range(len(tables)):
                tags = tables[i].findAll('a')
                if len(tags) > 0:
                    url = tags[0].get('href', None)
                    if "/boxscores/20" in url and len(tables[i]) > max_len:
                        index = i
                        max_len = len(tables[i])

            table = tables[index]

            tags = table.findAll('a')
            for tag in tags:
                url = re.sub("\.\.", original_url + year, tag.get('href', None))
                url += '?view=teamstats'
                href_list.append(url)

    print("done getting links")
    return href_list


def scrape(url):
    """ This function is used to create data 
    dictionaries for any url of team stats in the oua website. 
    It takes an array of urls but for some games there
    are extra fields to look out for. This function is 
    for those that do not have those extra fields in the table"""

    # create dictionary for links with less fields in table
    dictlist = {}
    for i in range(len(url)):

        print(url[i])
        r = requests.get(url[i])
        raw_html = r.content
        soup = BeautifulSoup(raw_html, 'html.parser')
        soup[url[i]] = BeautifulSoup(raw_html, 'html.parser')
        stats = soup[url[i]].findAll("table")
        scores = soup[url[i]].findAll('div', 
        {'class': 'teams clearfix'})[0].table
        # some links have different amounts of tables
        and sometimes the team stats table is different
        table = stats[8]
        for j in range(2, len(stats)):
            if str(stats[j].caption) == 
            '<caption class=
            "caption offscreen">
            <h2>Team Statistics</h2></caption>':
                table=stats[j]
                break

        dictlist[url[i]] = {}
        d = {}

        dictlist[url[i]] = {
            "Away" : table.findAll('th', {'scope': 'col'})[1].text.strip(),
            "Home" : table.findAll('th', {'scope': 'col'})[2].text.strip(),
        }
        try:
            winner = scores.findAll('tr', {'class': 'winner'})[0]
        except IndexError:
            d[None] = None
        try:
            loser = scores.findAll('tr', {'class': 'loser'})[0]
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update({"Winner": winner.th.text.strip()})
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update({"Loser": loser.th.text.strip()})
        except IndexError:
            d[None] = None
        # for k in range(1,6):
        #     dictlist[url[i]].update({"Winner Qtr" +k +Pts"})
        try:
            dictlist[url[i]].update({"Winner 1st Qtr Pts":
            winner.findAll('td')[0].text.strip()})
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update({"Loser 1st Qtr Pts":
            loser.findAll('td')[0].text.strip()})
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update({"Winner 2nd Qtr Pts":
            winner.findAll('td')[1].text.strip()})
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update({"Loser 2nd Qtr Pts":
            loser.findAll('td')[1].text.strip()})
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update({"Winner 3rd Qtr Pts":
            winner.findAll('td')[2].text.strip()})
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update({"Loser 3rd Qtr Pts":
            loser.findAll('td')[2].text.strip()})
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update({"Winner 4th Qtr Pts":
            winner.findAll('td')[3].text.strip()})
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update({"Loser 4th Qtr Pts":
            loser.findAll('td')[3].text.strip()})
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update( {"Winner Total Pts":
            winner.findAll('td')[4].text.strip()})
        except IndexError:
            d[None] = None
        try:
            dictlist[url[i]].update( {"Loser Total Pts":
            loser.findAll('td')[4].text.strip()})
        except IndexError:
            d[None] = None
        for j in range(16):
            try:
                dictlist[url[i]].update( { table.findAll('th', 
                {'scope': 'row'})[j].text.strip() + ' Away':
                table.findAll('td')[2*j].
                text.strip()})
            except IndexError:
                d[None] = None
            try:
                dictlist[url[i]].update({ table.findAll('th', 
                {'scope': 'row'})[j].text.strip() + ' Home' :
                table.findAll('td')[2*j+1].
                text.strip()})
            except IndexError:
                d[None] = None
            try:
                dictlist[url[i]].update({table.findAll('th',
                {'scope': 'row'})[16].text.strip()+' Away':
                table.findAll('td')[32].
                text.strip()})
            except IndexError:
                d[None] = None
    z = {**dictlist, **d}

    return z


if __name__ == '__main__':
    q = get_links()
    a = scrape(q)
    df = pd.DataFrame(a)
    df = df.T
    df = df.replace('\-', ' -- ', regex=True).astype(object)
    df = df[['Away', 'FG Away', 'FG% Away', '3PT FG Away', 
    '3PT FG% Away', 'FT Away', 'FT% Away', 'Rebounds Away',
             'Assists Away',
             'Turnovers Away', 'Points Off Turnovers Away', 
             '2nd Chance Points Away', 'Points in the Paint Away',
             'Fastbreak Points Away', 'Bench Points Away',
             'Largest Lead Away', 'Time of Largest Lead Away', 
             'Home', 'FG Home',
             'FG% Home', '3PT FG Home', '3PT FG% Home', 
             'FT Home', 'FT% Home', 'Rebounds Home', 'Assists Home',
             'Turnovers Home','Points Off Turnovers Home', 
             '2nd Chance Points Home', 'Points in the Paint Home', 
             'Fastbreak Points Home',
             'Bench Points Home', 'Largest Lead Away', 
             'Time of Largest Lead Away', 'Trends Away', 'Winner', 
             'Winner 1st Qtr Pts',
             'Winner 2nd Qtr Pts', 'Winner 3rd Qtr Pts', 
             'Winner 4th Qtr Pts', 'Winner Total Pts', 'Loser',
             'Loser 1st Qtr Pts', 'Loser 2nd Qtr Pts',
             'Loser 3rd Qtr Pts', 'Loser 4th Qtr Pts', 'Loser Total Pts']]

    df.to_csv('gbyg.csv', header=True)

    import pdb; pdb.set_trace()

7.1.2 OUA Player Stats Scraper

import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from urllib.request import urlopen as uReq
import re
import itertools

def get_links():
    print("getting links...")
    teams = ['algoma', 'brock', 'carleton', 'guelph', 
    'lakehead', 'laurentian',
             'laurier', 'mcmaster', 'nipissing', 
             'ottawa', 'queens', 'ryerson',
             'toronto', 'waterloo', 'western', 'windsor', 'york']
    years = ['2014-15', '2015-16', '2016-17', '2017-18', '2018-19']
    original_url = 'http://oua.ca/sports/mbkb/'
    end_url = '?view=gamelog'
    href_list = []
    for year in years:
        for team in teams:
            current_url = original_url + year + 
            '/teams/' + team + end_url
            r = requests.get(current_url)
            raw_html = r.content
            soup = BeautifulSoup(raw_html, 'html.parser')
            tables = soup.findAll('table')
            max_len = 0
            index = 0

            for i in range(len(tables)):
                tags = tables[i].findAll('a')
                if len(tags) > 0:
                    url = tags[0].get('href', None)
                    if "/boxscores/20" in url 
                    and len(tables[i]) > max_len:
                        index = i
                        max_len = len(tables[i])

            table = tables[index]

            tags = table.findAll('a')
            for tag in tags:
                url = re.sub("\.\.", original_url + year, 
                tag.get('href', None))
                url += '?view=teamstats'
                href_list.append(url)

    print("done getting links")
    return href_list

def vsplayers_scrape(url):
    vslist = {}
    homelist = {}
    for j in range(len(url)):
        print(url[j])
        r = requests.get(url[j])
        raw_html = r.content
        soup = BeautifulSoup(raw_html, 'html.parser')
        soup[url[j]] = BeautifulSoup(raw_html, 'html.parser')

        boxscore = soup[url[j]].find_all('article',
        {'class': 'game-boxscore bkb clearfix'})

        players = boxscore[0].find_all('div', 
        {'class': 'player-stats'})
        team1 = players[0].find_all('div', 
        {'class': 'stats-wrap clearfix'})
        visitorteam = team1[0].find_all('div',
        {'class': 'stats-box full lineup visitor clearfix'})

        team2 = players[0].find_all('div', 
        {'class': 'stats-wrap clearfix'})[1]
        hometeam = team2.find_all('div', 
        {'class': 'stats-box full lineup home clearfix'})
        hometbody = hometeam[0].find_all('tbody')
        hometr = hometbody[1].find_all('tr')

        visitortbody = visitorteam[0].find_all('tbody')
        visitortr = visitorteam[0].find_all('tr')
        vslist[url[j]] = {}

        for k in range(len(visitorteam[0].find_all('tbody'))):
            if visitorteam[0].find_all('tbody')[k].
            tr.text.strip() == str('STARTERS'):
                starters = visitorteam[0].find_all('tbody')[k]
                starterstr = starters.find_all('tr')
            elif visitorteam[0].find_all('tbody')[k].
            tr.text.strip() == str('RESERVES'):
                reserves = visitorteam[0].find_all('tbody')[k]
                reservestr = reserves.find_all('tr')

        if len(starterstr) > 4:
            for i in range((len(starters.find_all('th'))) - 1):
                vslist[url[j],i] = {
                    'Away' : visitorteam[0].caption.text.strip(),
                    visitorteam[0].thead.th.text.strip() :
                    starters.find_all('th')[i+1].text.strip(),
                    visitorteam[0].find_all('th')[1].text.strip() :
                    starterstr[i+1].td.text.strip(),
                    visitorteam[0].find_all('th')[2].text.strip() :
                    starterstr[i+1].find_all('td')[1].text.strip(),
                    visitorteam[0].find_all('th')[3].text.strip() :
                    starterstr[i+1].find_all('td')[2].text.strip(),
                    visitorteam[0].find_all('th')[4].text.strip() :
                    starterstr[i+1].find_all('td')[3].text.strip(),
                    visitorteam[0].find_all('th')[5].text.strip() :
                    starterstr[i+1].find_all('td')[4].text.strip(),
                    visitorteam[0].find_all('th')[6].text.strip() :
                    starterstr[i+1].find_all('td')[5].text.strip(),
                    visitorteam[0].find_all('th')[7].text.strip():
                    starterstr[i+1].find_all('td')[6].text.strip(),
                    visitorteam[0].find_all('th')[8].text.strip():
                    starterstr[i+1].find_all('td')[7].text.strip(),
                    visitorteam[0].find_all('th')[9].text.strip():
                    starterstr[i+1].find_all('td')[8].text.strip(),
                    visitorteam[0].find_all('th')[10].text.strip():
                    starterstr[i+1].find_all('td')[9].text.strip(),
                    visitorteam[0].find_all('th')[11].text.strip():
                    starterstr[i+1].find_all('td')[10].text.strip(),
                    visitorteam[0].find_all('th')[12].text.strip():
                    starterstr[i+1].find_all('td')[11].text.strip(),
                    visitorteam[0].find_all('th')[13].text.strip():
                    starterstr[i+1].find_all('td')[12].text.strip(),
                }

    return vslist

def vrplayers_scrape(url):
    vrlist = {}
    homelist = {}
    for j in range(len(url)):
        print(url[j])
        r = requests.get(url[j])
        raw_html = r.content
        soup = BeautifulSoup(raw_html, 'html.parser')
        soup[url[j]] = BeautifulSoup(raw_html, 'html.parser')

        boxscore = soup[url[j]].find_all('article',
        {'class': 'game-boxscore bkb clearfix'})

        players = boxscore[0].find_all('div', {'class': 'player-stats'})
        team1 = players[0].find_all('div', {'class': 'stats-wrap clearfix'})
        visitorteam = team1[0].find_all('div', 
        {'class': 'stats-box full lineup visitor clearfix'})

        team2 = players[0].find_all('div', 
        {'class': 'stats-wrap clearfix'})[1]
        hometeam = team2.find_all('div', 
        {'class': 'stats-box full lineup home clearfix'})
        hometbody = hometeam[0].find_all('tbody')
        hometr = hometbody[1].find_all('tr')

        visitortbody = visitorteam[0].find_all('tbody')
        visitortr = visitorteam[0].find_all('tr')
        vrlist[url[j]] = {}

        for k in range(len(visitorteam[0].find_all('tbody'))):
            if visitorteam[0].find_all('tbody')[k].
            tr.text.strip() == str('STARTERS'):
                starters = visitorteam[0].find_all('tbody')[k]
                starterstr = starters.find_all('tr')
            elif visitorteam[0].find_all('tbody')[k].
            tr.text.strip() == str('RESERVES'):
                reserves = visitorteam[0].find_all('tbody')[k]
                reservestr = reserves.find_all('tr')

        if len(reservestr) > 0:
            for i in range((len(reserves.find_all('th'))) - 1):
                vrlist[url[j],i] = {
                    'Away' : visitorteam[0].caption.text.strip(),
                    visitorteam[0].thead.th.text.strip() :
                    reserves.find_all('th')[i+1].text.strip(),
                    visitorteam[0].find_all('th')[1].text.strip() :
                    reservestr[i+1].td.text.strip(),
                    visitorteam[0].find_all('th')[2].text.strip() :
                    reservestr[i+1].find_all('td')[1].text.strip(),
                    visitorteam[0].find_all('th')[3].text.strip() :
                    reservestr[i+1].find_all('td')[2].text.strip(),
                    visitorteam[0].find_all('th')[4].text.strip() :
                    reservestr[i+1].find_all('td')[3].text.strip(),
                    visitorteam[0].find_all('th')[5].text.strip() :
                    reservestr[i+1].find_all('td')[4].text.strip(),
                    visitorteam[0].find_all('th')[6].text.strip() :
                    reservestr[i+1].find_all('td')[5].text.strip(),
                    visitorteam[0].find_all('th')[7].text.strip():
                    reservestr[i+1].find_all('td')[6].text.strip(),
                    visitorteam[0].find_all('th')[8].text.strip():
                    reservestr[i+1].find_all('td')[7].text.strip(),
                    visitorteam[0].find_all('th')[9].text.strip():
                    reservestr[i+1].find_all('td')[8].text.strip(),
                    visitorteam[0].find_all('th')[10].text.strip():
                    reservestr[i+1].find_all('td')[9].text.strip(),
                    visitorteam[0].find_all('th')[11].text.strip():
                    reservestr[i+1].find_all('td')[10].text.strip(),
                    visitorteam[0].find_all('th')[12].text.strip():
                    reservestr[i+1].find_all('td')[11].text.strip(),
                    visitorteam[0].find_all('th')[13].text.strip():
                    reservestr[i+1].find_all('td')[12].text.strip(),
                }

    return vrlist





def hsplayers_scrape(url):
    rlist = {}
    slist = {}
    for j in range(len(url)):
        print(url[j])
        r = requests.get(url[j])
        raw_html = r.content
        soup = BeautifulSoup(raw_html, 'html.parser')
        soup[url[j]] = BeautifulSoup(raw_html, 'html.parser')

        boxscore = soup[url[j]].find_all('article', 
        {'class': 'game-boxscore bkb clearfix'})

        players = boxscore[0].find_all('div', 
        {'class': 'player-stats'})
        team1 = players[0].find_all('div', 
        {'class': 'stats-wrap clearfix'})
        visitorteam = team1[0].find_all('div', 
        {'class': 'stats-box full lineup visitor clearfix'})

        team2 = players[0].find_all('div', 
        {'class': 'stats-wrap clearfix'})[1]
        hometeam = team2.find_all('div', 
        {'class': 'stats-box full lineup home clearfix'})
        hometbody = hometeam[0].find_all('tbody')
        hometr = hometbody[1].find_all('tr')

        for k in range(len(hometeam[0].find_all('tbody'))):
            if hometeam[0].find_all('tbody')[k].
            tr.text.strip() == str('STARTERS'):
                starters = hometeam[0].find_all('tbody')[k]
                starterstr = starters.find_all('tr')
            elif hometeam[0].find_all('tbody')[k].
            tr.text.strip() == str('RESERVES'):
                reserves = hometeam[0].find_all('tbody')[k]
                reservestr = reserves.find_all('tr')

        slist[url[j]] = {}
        if len(starterstr) > 4:
            for i in range((len(starters.find_all('th'))) - 1):
                slist[url[j],i] = {
                    'Home': hometeam[0].caption.text.strip(),
                    hometeam[0].thead.th.text.strip():
                    starters.find_all('th')[i + 1].text.strip(),
                    hometeam[0].find_all('th')[1].text.strip():
                    starterstr[i + 1].td.text.strip(),
                    hometeam[0].find_all('th')[2].text.strip():
                    starterstr[i + 1].find_all('td')[1].text.strip(),
                    hometeam[0].find_all('th')[3].text.strip():
                    starterstr[i + 1].find_all('td')[2].text.strip(),
                    hometeam[0].find_all('th')[4].text.strip():
                    starterstr[i + 1].find_all('td')[3].text.strip(),
                    hometeam[0].find_all('th')[5].text.strip():
                    starterstr[i + 1].find_all('td')[4].text.strip(),
                    hometeam[0].find_all('th')[6].text.strip():
                    starterstr[i + 1].find_all('td')[5].text.strip(),
                    hometeam[0].find_all('th')[7].text.strip():
                    starterstr[i + 1].find_all('td')[6].text.strip(),
                    hometeam[0].find_all('th')[8].text.strip():
                    starterstr[i + 1].find_all('td')[7].text.strip(),
                    hometeam[0].find_all('th')[9].text.strip():
                    starterstr[i + 1].find_all('td')[8].text.strip(),
                    hometeam[0].find_all('th')[10].text.strip():
                    starterstr[i + 1].find_all('td')[9].text.strip(),
                    hometeam[0].find_all('th')[11].text.strip():
                    starterstr[i + 1].find_all('td')[10].text.strip(),
                    hometeam[0].find_all('th')[12].text.strip():
                    starterstr[i + 1].find_all('td')[11].text.strip(),
                    hometeam[0].find_all('th')[13].text.strip():
                    starterstr[i + 1].find_all('td')[12].text.strip(),
                }

    return slist

def hrplayers_scrape(url):
    rlist = {}
    slist = {}
    for j in range(len(url)):
        print(url[j])
        r = requests.get(url[j])
        raw_html = r.content
        soup = BeautifulSoup(raw_html, 'html.parser')
        soup[url[j]] = BeautifulSoup(raw_html, 'html.parser')

        boxscore = soup[url[j]].find_all('article', 
        {'class': 'game-boxscore bkb clearfix'})

        players = boxscore[0].find_all('div', 
        {'class': 'player-stats'})
        team1 = players[0].find_all('div', 
        {'class': 'stats-wrap clearfix'})
        visitorteam = team1[0].find_all('div', 
        {'class': 'stats-box full lineup visitor clearfix'})

        team2 = players[0].find_all('div', 
        {'class': 'stats-wrap clearfix'})[1]
        hometeam = team2.find_all('div', {'class':
        'stats-box full lineup home clearfix'})
        hometbody = hometeam[0].find_all('tbody')
        hometr = hometbody[1].find_all('tr')

        for k in range(len(hometeam[0].find_all('tbody'))):
            if hometeam[0].find_all('tbody')[k].
            tr.text.strip() == str('STARTERS'):
                starters = hometeam[0].find_all('tbody')[k]
                starterstr = starters.find_all('tr')
            elif hometeam[0].find_all('tbody')[k].
            tr.text.strip() == str('RESERVES'):
                reserves = hometeam[0].find_all('tbody')[k]
                reservestr = reserves.find_all('tr')

        rlist[url[j]] = {}
        if len(reservestr) > 0:
            for i in range((len(reserves.find_all('th'))) - 1):
                rlist[url[j], i] = {
                    'Home': hometeam[0].caption.text.strip(),
                    hometeam[0].thead.th.text.strip():
                    reserves.find_all('th')[i + 1].text.strip(),
                    hometeam[0].find_all('th')[1].text.strip():
                    reservestr[i + 1].td.text.strip(),
                    hometeam[0].find_all('th')[2].text.strip():
                    reservestr[i + 1].find_all('td')[1].text.strip(),
                    hometeam[0].find_all('th')[3].text.strip():
                    reservestr[i + 1].find_all('td')[2].text.strip(),
                    hometeam[0].find_all('th')[4].text.strip():
                    reservestr[i + 1].find_all('td')[3].text.strip(),
                    hometeam[0].find_all('th')[5].text.strip():
                    reservestr[i + 1].find_all('td')[4].text.strip(),
                    hometeam[0].find_all('th')[6].text.strip():
                    reservestr[i + 1].find_all('td')[5].text.strip(),
                    hometeam[0].find_all('th')[7].text.strip():
                    reservestr[i + 1].find_all('td')[6].text.strip(),
                    hometeam[0].find_all('th')[8].text.strip():
                    reservestr[i + 1].find_all('td')[7].text.strip(),
                    hometeam[0].find_all('th')[9].text.strip():
                    reservestr[i + 1].find_all('td')[8].text.strip(),
                    hometeam[0].find_all('th')[10].text.strip():
                    reservestr[i + 1].find_all('td')[9].text.strip(),
                    hometeam[0].find_all('th')[11].text.strip():
                    reservestr[i + 1].find_all('td')[10].text.strip(),
                    hometeam[0].find_all('th')[12].text.strip():
                    reservestr[i + 1].find_all('td')[11].text.strip(),
                    hometeam[0].find_all('th')[13].text.strip():
                    reservestr[i + 1].find_all('td')[12].text.strip(),
                }
    return rlist

q = get_links()
b = hrplayers_scrape(q)
c = hsplayers_scrape(q)
d = vsplayers_scrape(q)
e = vrplayers_scrape(q)



df1 = pd.DataFrame(b)
df1 = df1.T
df1 = df1.replace('\-', ' -- ', regex=True).astype(object)
df1 = df1.replace('\\n', '', regex=True).astype(object)
df1.to_csv('home reserves.csv',header = True)

df2 = pd.DataFrame(c)
df2 = df2.T
df2 = df2.replace('\-', ' -- ', regex=True).astype(object)
df2 = df2.replace('\\n', '', regex=True).astype(object)
df2.to_csv('home starters.csv',header = True)

df3 = pd.DataFrame(d)
df3 = df3.T
df3 = df3.replace('\-', ' -- ', regex=True).astype(object)
df3 = df3.replace('\\n', '', regex=True).astype(object)
df3.to_csv('visitors starters.csv',header = True)

df4 = pd.DataFrame(e)
df4 = df4.T
df4 = df4.replace('\-', ' -- ', regex=True).astype(object)
df4 = df4.replace('\\n', '', regex=True).astype(object)
df4.to_csv('visitors reserves.csv',header = True)


concat = pd.concat([df2,df3,df1,df4],sort=False)

df1.columns

concat.to_csv('player_data.csv',header=True)

7.1.3 Synergy Data Scraper

from bs4 import BeautifulSoup
from selenium import webdriver
import pandas as pd
import time
import os
import sys


browser = webdriver.Chrome(os.path.join(sys.path[0], 'chromedriver'))

def login():
    login_url = 'https://www.synergysportstech.com/Synergy/Default.aspx'
    browser.get(login_url)
    username1 = browser.find_element_by_css_selector('#txtUserName')
    username = "************"
    username1.send_keys(username)
    password1 = browser.find_element_by_css_selector('#txtPassword')
    password = "************"
    password1.send_keys(password)

    browser.find_element_by_css_selector('#btnLogin').click()


def get_links():
    browser.get('https://www.synergysportstech.com/Synergy/Sport/Basketball/web/teamsst/Video/SelectGame2.aspx')

    el5 = browser.find_element_by_css_selector
    ('#ctl00_MainContent_lstSeason')
    for option in el5.find_elements_by_tag_name('option'):
        if option.text == '2014 - 2015':
            option.click() # select() in earlier versions of webdriver
            break

    time.sleep(3)
    el2 = browser.find_element_by_css_selector
    ('#ctl00_MainContent_lstDivisionGroup')
    for option in el2.find_elements_by_tag_name('option'):
        if option.text == 'U Sports':
            option.click() # select() in earlier versions of webdriver
            break

    el4 = browser.find_element_by_css_selector
    ('#ctl00_MainContent_lstViewMax')
    for option in el4.find_elements_by_tag_name('option'):
        if option.text == '1600':
            option.click() # select() in earlier versions of webdriver
            break

    el = browser.find_element_by_css_selector
    ('#ctl00_MainContent_lstSubType')
    for option in el.find_elements_by_tag_name('option'):
        if option.text == 'Regular Season':
            option.click() # select() in earlier versions of webdriver
            break

    time.sleep(5)

    el3 = browser.find_element_by_css_selector
    ('#ctl00_MainContent_lstDivisions')
    for option in el3.find_elements_by_tag_name('option'):
        if option.text == 'Ontario University Athletics':
            option.click() # select() in earlier versions of webdriver
            break

    time.sleep(5)

    links = browser.find_elements_by_tag_name('table')
    html = links[2].get_attribute('innerHTML')
    soup1 = BeautifulSoup(html, 'html.parser')
    href_list1 = soup1.find_all('a')
    i = 0
    url_list = []
    root_url = 'https://www.synergysportstech.com/Synergy/Sport/Basketball/web/teamsst/Video/'
    for link in href_list1:
        if "GameGrid2" in link['href']:
            url_list.append(root_url + link['href'])

    return url_list

def scrape(url_list):
    dict = {}
    for url in url_list:
        dict[url] = {}
        browser.get(url)
        browser.find_element_by_link_text('Game Breakdown').click()
        time.sleep(5)

        table = browser.find_elements_by_class_name('Tier')
        raw_html = table[2].get_attribute('innerHTML')
        soup = BeautifulSoup(raw_html, 'html.parser')
        raw_html2 = table[0].get_attribute('innerHTML')
        soup2 = BeautifulSoup(raw_html2, 'html.parser')
        print(soup.tr)
        tr = soup2.find_all('tr')
        Away_Team = tr[1].td.text.strip()
        Away_Total_Score = tr[1].find_all('td')[1].text.strip()
        Home_Team = tr[2].td.text.strip()
        Home_Total_Score = tr[2].find_all('td')[1].text.strip()
        # team1 = soup.find_all('td')[7].text.strip()
        # team2 = soup.find_all('td')[8].text.strip()

        tierrow = soup.find_all('tr', {'class': 'TierRow'})

        dict[url][Home_Team] = {}
        dict[url][Away_Team] = {}
        for i in range(len(tierrow)):
            row = soup.find_all('tr', {'class': 'TierRow'})[i]
            rowname = row.find_all('td')[0].text.strip()
            dict[url][Home_Team][rowname] = row.find_all('td')[1].text.strip()
            dict[url][Home_Team]['Total Points'] = Home_Total_Score
            dict[url][Away_Team][rowname] = row.find_all('td')[2].text.strip()
            dict[url][Away_Team]['Total Points'] = Away_Total_Score
            if int(Home_Total_Score) > int(Away_Total_Score):
                dict[url][Home_Team]['Winner'] = 1
            elif Away_Total_Score > Home_Total_Score:
                dict[url][Away_Team]['Winner'] = 1

    return dict

if __name__ == '__main__':
    login()
    urls = get_links()
    dict = scrape(urls)

    df = pd.DataFrame.from_dict({(i,j): dict[i][j]
                           for i in dict.keys()
                           for j in dict[i].keys()})
    df = df.T
    df.to_csv('General2014-15 .csv')
    df.fillna(0)