-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhttp_api.py
More file actions
116 lines (100 loc) · 3.26 KB
/
http_api.py
File metadata and controls
116 lines (100 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import time
import urllib.request
import urllib.error
from socket import timeout
import json
import gzip
import logging
import shutil
import io
from config import PROXY, ERROR_DELAY, DEBUG, DELAY, ERROR_403_DELAY
logger = logging.getLogger(__name__)
HEADERS = [
("Accept-Encoding", "gzip"),
("User-Agent", "QuantNet (info@quantnet.ai)")
]
def log(*args):
s = " ".join([str(i) for i in args])
logger.log(logging.INFO, s)
def load_with_retry(url, use_gzip=True):
while True:
log("request", url)
try:
time.sleep(DELAY)
if use_gzip:
opener.addheaders = HEADERS
urllib.request.install_opener(opener)
response = urllib.request.urlopen(url, timeout=10)
body = response.read()
if response.headers.get('Content-Encoding') == 'gzip':
body = gzip.decompress(body)
return body
except KeyboardInterrupt as e:
raise e
except urllib.error.HTTPError as err:
if err.code == 404:
return ''
elif err.code == 403:
logger.exception("rate limit")
time.sleep(ERROR_403_DELAY)
else:
logger.exception("unexpected")
time.sleep(ERROR_DELAY)
except timeout:
log("timeout")
time.sleep(ERROR_DELAY)
except Exception:
logger.exception("unexpected")
time.sleep(ERROR_DELAY)
finally:
opener.addheaders = []
urllib.request.install_opener(None)
def decode_str(body):
try:
body = body.decode()
except:
body = body.decode("cp1252")
return body
def load_file(url, file_name, use_gzip=True):
while True:
try:
log("load file: " + url + " -> " + file_name)
if use_gzip == True:
opener.addheaders = HEADERS
urllib.request.install_opener(opener)
result = urllib.request.urlretrieve(url, file_name)
if use_gzip and result[1].get('Content-Encoding') != 'gzip':
tf = file_name + '.tmp'
gzip_file(file_name, tf)
shutil.move(tf, file_name)
log("done")
return
except KeyboardInterrupt as e:
raise e
except Exception as e:
logger.exception("wget failed")
time.sleep(ERROR_DELAY)
finally:
opener.addheaders = []
urllib.request.install_opener(None)
def gzip_file(ifn, ofn):
block_size = 1024*1024
with io.open(ifn, 'rb') as f_in:
with gzip.open(ofn, 'wb') as f_out:
while True:
block = f_in.read(block_size)
if len(block) == 0:
break
f_out.write(block)
return
# urllib setup
PROXIES = {} if PROXY is None else {
'http': PROXY,
'https': PROXY
}
debug = 2 if DEBUG else 0
https_handler = urllib.request.HTTPSHandler(debuglevel=debug)
http_handler = urllib.request.HTTPHandler(debuglevel=debug)
proxy_handler = urllib.request.ProxyHandler(PROXIES)
proxy_auth_handler = urllib.request.ProxyBasicAuthHandler()
opener = urllib.request.build_opener(proxy_handler, proxy_auth_handler, http_handler, https_handler)