ysf/github-subdomains
used_urls = {}
found_subdomains = {}
API_URL = 'https://api.github.com/search/code'
session = http_mksession()
function add_to_db(domain_id, subdomain)
if found_subdomains[subdomain] then return end
found_subdomains[subdomain] = 1
db_add('subdomain', {domain_id=domain_id, value=subdomain})
end
function to_raw_url(html_url)
html_url = str_replace(html_url, 'https://github.com/', 'https://raw.githubusercontent.com/')
return str_replace(html_url, '/blob/', '/')
end
function fetch_user_content(raw_url)
if used_urls[raw_url] then return end
used_urls[raw_url] = 1
debug("requesting raw user content "..raw_url)
local req = http_request(session, 'GET', raw_url, {
headers={authorization='token '..creds['access_key']},
binary=true,
})
local resp = http_fetch(req)
if last_err() then return end
local body = utf8_decode(resp['binary'])
if last_err() then
clear_err()
return nil
end
return body
end
function github_search(domain, sort, order, page)
local req = http_request(session, 'GET', API_URL, {
headers={authorization='token '..creds['access_key']},
query={
type='Code',
q='"'..domain..'"',
per_page='100',
s=sort,
o=order,
page=strval(page)
}
})
local resp = http_fetch(req)
if last_err() then return end
local data = json_decode(resp['text'])
if last_err() then return end
local reset_time = resp['headers']['x-ratelimit-reset']
local remaining = resp['headers']['x-ratelimit-remaining']
local now = time_unix()
local reset_seconds = reset_time - now
ratelimit_throttle('github-search', remaining - 1, reset_seconds*1000)
info({reset_seconds=reset_seconds, reset_time=reset_time, remaining=remaining})
return data['items']
end
function run(domain)
local DOMAIN_REGEX = '(([0-9a-z_\\-\\.]+)\\.' .. str_replace(domain['value'], '.', '\\.') .. ')'
creds = keyring('github')[1]
if not creds then
return 'github api key missing, please login and visit https://github.com/settings/tokens - no permissions required.'
end
local sort_order = {
{ sort='indexed', order='desc' },
{ sort='indexed', order='asc' },
{ sort='', order='desc' },
}
for o = 1, #sort_order do
local page = 0
local sort = sort_order[o]['sort']
local order = sort_order[o]['order']
while true do
page = page + 1
if page > 10 then
break
end
info("requesting page "..page)
local items = github_search(domain['value'], sort, order, page)
if last_err() then return end
if not items or #items == 0 then return end
for i = 1, #items do
local raw_url = to_raw_url(items[i]['html_url'])
local body = fetch_user_content(raw_url)
if last_err() then
warn("skipping "..raw_url.." due to error: "..last_err())
clear_err()
elseif body then
local subdomains = regex_find_all(DOMAIN_REGEX, body)
for j = 1, #subdomains do
add_to_db(domain['id'], subdomains[j][1])
end
if last_err() then return end
end
end
end
end
end