kpcyrd/instagram
PAGE_SIZE = 50
function get_shared_data(html)
local s = html_select_list(html, 'script')
for i=1, #s do
local m = regex_find('^window\\._sharedData = (.+);$', s[i]['text'])
if m then
return json_decode(m[2])
end
end
end
function sign_request(rhx, json_params)
local magic = rhx .. ':' .. json_params
local x_instagram_gis = hex(md5(magic))
return x_instagram_gis
end
function download_image(node)
local url = node['display_url']
debug(url)
local req = http_request(session, 'GET', url, {
into_blob=true,
})
local r = http_send(req)
if last_err() then return end
if r['status'] ~= 200 then return 'http error: ' .. r['status'] end
db_add('image', {
value=r['blob'],
})
end
function pull_graphql(page)
local end_cursor = page['page_info']['end_cursor']
for i=1, #page['edges'] do
local node = page['edges'][i]['node']
node['thumbnail_resources'] = nil
node['media_preview'] = nil
local err = download_image(node)
if last_err() then return end
if err ~= nil then return err end
todo_posts = todo_posts -1
debug('posts left: ' .. todo_posts .. '/' .. total_posts)
end
if page['page_info']['has_next_page'] then
debug('requesting next page=' .. end_cursor)
variables = json_encode({
id=user['id'],
first=PAGE_SIZE,
after=end_cursor
})
local headers = {}
headers['X-Instagram-GIS'] = sign_request(rhx_gis, variables)
local req = http_request(session, 'GET', 'https://www.instagram.com/graphql/query/', {
query={
query_hash='42323d64886122307be10013ad2dcc44',
variables=variables,
},
headers=headers,
})
r = http_send(req)
if last_err() then return end
if r['status'] ~= 200 then return 'http error: ' .. r['status'] end
x = json_decode(r['text'])
if last_err() then return end
return pull_graphql(x['data']['user']['edge_owner_to_timeline_media'])
end
end
function run(arg)
session = http_mksession()
local url = 'https://www.instagram.com/' .. arg['username'] .. '/'
local req = http_request(session, 'GET', url, {})
local resp = http_send(req)
if last_err() then return end
if resp['status'] ~= 200 then return 'invalid status code' end
local html = resp['text']
local ld = html_select(html, 'script[type="application/ld+json"]')
if last_err() then return end
local ld = json_decode(ld['text'])
if last_err() then return end
if ld['email'] then
db_add('email', {
value=ld['email'],
})
end
db_update('account', arg, {
displayname=ld['name'],
email=ld['email'],
url=url,
})
local sd = get_shared_data(html)
if last_err() then return end
rhx_gis = sd['rhx_gis']
user = sd['entry_data']['ProfilePage'][1]['graphql']['user']
local page = user['edge_owner_to_timeline_media']
total_posts = page['count']
todo_posts = total_posts
return pull_graphql(page)
end