39 lines
787 B
Python
39 lines
787 B
Python
from random import choice
|
|
from urllib.parse import urlsplit
|
|
|
|
import requests
|
|
|
|
from scrape_ob.agents import USER_AGENTS
|
|
|
|
|
|
def get_agent(url, **kwargs) -> requests.Response:
|
|
# fake a user agent because apparently they reject blank connections lol
|
|
kwargs.update({
|
|
"headers":{
|
|
"User-Agent": choice(USER_AGENTS)
|
|
}
|
|
})
|
|
|
|
gotten = requests.get(
|
|
url,
|
|
**kwargs
|
|
)
|
|
return gotten
|
|
|
|
|
|
def project_name(url:str) -> str:
|
|
"""
|
|
Get the project name from the URL of an OB page
|
|
|
|
Examples:
|
|
'https://edspace.american.edu/openbehavior/project/ethoscopes/'
|
|
becomes
|
|
'ethoscopes'
|
|
|
|
Args:
|
|
url: URL of project page
|
|
|
|
Returns:
|
|
str
|
|
"""
|
|
return urlsplit(url).path.strip('/').split('/')[-1] |