from urllib.parse import urlparse import requests from .errors import ActivityNotFoundError from .urlutils import check_url class ObjectService(object): def __init__(self, user_agent, col, inbox, outbox, instances): self._user_agent = user_agent self._col = col self._inbox = inbox self._outbox = outbox self._instances = instances self._known_instances = set() def _fetch_remote(self, object_id): print(f'fetch remote {object_id}') check_url(object_id) resp = requests.get(object_id, headers={ 'Accept': 'application/activity+json', 'User-Agent': self._user_agent, }) if resp.status_code == 404: raise ActivityNotFoundError(f'{object_id} cannot be fetched, 404 error not found') resp.raise_for_status() return resp.json() def _fetch(self, object_id): instance = urlparse(object_id)._replace(path='', query='', fragment='').geturl() if instance not in self._known_instances: self._known_instances.add(instance) if not self._instances.find_one({'instance': instance}): self._instances.insert({'instance': instance, 'first_object': object_id}) obj = self._inbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]}) if obj: if obj['remote_id'] == object_id: return obj['activity'] return obj['activity']['object'] obj = self._outbox.find_one({'$or': [{'remote_id': object_id}, {'type': 'Create', 'activity.object.id': object_id}]}) if obj: if obj['remote_id'] == object_id: return obj['activity'] return obj['activity']['object'] return self._fetch_remote(object_id) def get(self, object_id, reload_cache=False, part_of_stream=False, announce_published=None): if reload_cache: obj = self._fetch(object_id) self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True) return obj cached_object = self._col.find_one({'object_id': object_id}) if cached_object: print(f'ObjectService: {cached_object}') return cached_object['cached_object'] obj = self._fetch(object_id) self._col.update({'object_id': object_id}, {'$set': {'cached_object': obj, 'meta.part_of_stream': part_of_stream, 'meta.announce_published': announce_published}}, upsert=True) # print(f'ObjectService: {obj}') return obj