Added iWeb extractor support

master
JordanPlayz158 3 weeks ago
parent fd65ef6646
commit c9a04455fc
  1. 2
      src/lib/collectors.js
  2. 61
      src/lib/utils/body.js
  3. 61
      src/lib/utils/parser/parser.js

@ -68,7 +68,7 @@ function fetchUserFromHTML(username) {
// require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineEntry -> collectors -/> User
const User = require("./structures/User")
const userData = selectExtractor(text)
const userData = await selectExtractor(text)
const user = new User(userData)
history.report("user", true)
if (constants.caching.db_user_id) {

@ -1,11 +1,14 @@
const constants = require("../constants")
const {Parser} = require("./parser/parser")
const switcher = require("./torswitcher");
function selectExtractor(text) {
if (text.includes("window._sharedData = ")) {
return extractSharedData(text)
} else if (text.includes("PolarisQueryPreloaderCache")) {
return extractPreloader(text)
} else if (text.includes("web_profile_info")) {
return extractIWeb(text)
} else {
throw constants.symbols.extractor_results.NO_SHARED_DATA
}
@ -62,6 +65,64 @@ function extractPreloader(text) {
return JSON.parse(profileInfoResponse.result.response).data.user
}
/**
* @param {string} text
* @returns {any}
*/
function extractIWeb(text) {
const parser = new Parser(text)
const index = parser.seek("web_profile_info\\/\",", {moveToMatch: true, useEnd: true})
if (index === -1) {
// Maybe the profile is age restricted?
const age = getRestrictedAge(text)
if (age !== null) { // Correct.
throw constants.symbols.extractor_results.AGE_RESTRICTED
}
throw constants.symbols.extractor_results.NO_SHARED_DATA
}
// Uncomment for debugging
//console.log("index: " + index)
// Change this text to get the desired object that's enclosing the string
// let enclosingObject = '"profile":{'
let enclosingObject = '"request":{'
parser.rewind(enclosingObject, {moveToMatch: true})
//console.log("parser.cursor: " + parser.cursor)
const endObjectIndex = parser.findClosingCurlyBracket()
//console.log(endObjectIndex)
parser.cursor += enclosingObject.length - 1
let requestDataString = parser.slice((endObjectIndex - parser.cursor))
//console.log(requestDataString)
const requestData = JSON.parse(requestDataString)
console.log(requestData)
const queryData = requestData.params.query;
const params = new URLSearchParams()
Object.keys(queryData).forEach(function(k){
params.set(k, queryData[k]);
});
return switcher.request("user_html", `https://i.instagram.com${requestData.url}?${params}`, async res => {
if (res.status === 301) throw constants.symbols.ENDPOINT_OVERRIDDEN
if (res.status === 302) throw constants.symbols.INSTAGRAM_DEMANDS_LOGIN
if (res.status === 429) throw constants.symbols.RATE_LIMITED
return res
}).then(async g => {
const res = await g.response()
const json = await g.json()
// require down here or have to deal with require loop. require cache will take care of it anyway.
// User -> Timeline -> TimelineEntry -> collectors -/> User
return json.data.user
})
}
/**
* @param {string} text
*/

@ -125,6 +125,12 @@ class Parser {
/**
* Seek to or past the next occurance of the string.
*
* moveToMatch moves the cursors current position to the position of the index (if it is not -1)
*
* useEnd adds the length of the text you supplied through toFind in the final index
* (so the text from toFind will be included in the text if you slice from index)
*
* @param {string} toFind
* @param {{moveToMatch?: boolean, useEnd?: boolean}} options both default to false
*/
@ -139,6 +145,61 @@ class Parser {
return index
}
/**
* Seek to or past the next occurance of the string BACKWARDS.
*
* moveToMatch moves the cursors current position to the position of the index (if it is not -1)
*
* @param {string} toFind
* @param {{moveToMatch?: boolean}} options both default to false
*/
rewind(toFind, options = {}) {
if (options.moveToMatch === undefined) options.moveToMatch = false
let index = this.string.lastIndexOf(toFind, this.cursor)
if (index !== -1) {
if (options.moveToMatch) this.cursor = index
}
return index
}
findClosingCurlyBracket() {
let originalPosition = this.cursor
let openCurlyBraceIndex = this.seek("{")
const firstClosedCurlyBraceIndex = this.seek("}")
let openCurlyBraceCount = 0
while(openCurlyBraceIndex < firstClosedCurlyBraceIndex) {
openCurlyBraceIndex = this.seek("{", {moveToMatch: true, useEnd: true})
this.store()
if(openCurlyBraceIndex !== -1 && openCurlyBraceIndex < firstClosedCurlyBraceIndex) {
openCurlyBraceCount++;
} else {
break
}
}
this.cursor = originalPosition
let closingCurlyBraceCount = 0
let closingCurlyBraceIndex = 0
while(openCurlyBraceCount > closingCurlyBraceCount) {
closingCurlyBraceIndex = this.seek("}", {moveToMatch: true, useEnd: true})
this.store()
if(closingCurlyBraceIndex !== -1 && openCurlyBraceCount > closingCurlyBraceCount) {
closingCurlyBraceCount++;
} else {
break
}
}
this.cursor = originalPosition
return closingCurlyBraceIndex;
}
/**
* Replace the current string, adding the old one to the substore.
* @param {string} string

Loading…
Cancel
Save