Initial Commit

2025-12-06 09:50:25 +01:00
commit a5f30a65e6
763 changed files with 309904 additions and 0 deletions
--- a/translators/InfoTrac.js
+++ b/translators/InfoTrac.js
@@ -0,0 +1,279 @@
+{
+	"translatorID": "6773a9af-5375-3224-d148-d32793884dec",
+	"label": "InfoTrac",
+	"creator": "Simon Kornblith",
+	"target": "^https?://[^/]+/itw/infomark/",
+	"minVersion": "1.0.0b3.r1",
+	"maxVersion": "",
+	"priority": 250,
+	"inRepository": true,
+	"translatorType": 4,
+	"browserSupport": "gcsibv",
+	"lastUpdated": "2021-12-28 04:42:55"
+}
+
+function detectWeb(doc, url) {
+	
+	// ensure that there is an InfoTrac logo
+	if (!doc.evaluate('//img[substring(@alt, 1, 8) = "InfoTrac"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) return false;
+	
+	if (doc.title.substring(0, 8) == "Article ") {
+		if (ZU.xpathText(doc, '//td//img[contains(@src, "ncnp_logo.gif")]/@title')) return "newspaperArticle";
+		var genre = doc.evaluate('//comment()[substring(., 1, 6) = " Genre"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
+		
+		if (genre) {
+			var value = Zotero.Utilities.trimInternal(genre.nodeValue.substr(7));
+			if (value == "article") {
+				return "journalArticle";
+			} else if (value == "book") {
+				return "book";
+			} else if (value == "dissertation") {
+				return "thesis";
+			} else if (value == "bookitem") {
+				return "bookSection";
+			}
+		}
+		
+		return "magazineArticle";
+	} else if (doc.title.substring(0, 10) == "Citations ") {
+		return "multiple";
+	}
+}
+
+function scrape(doc, url){
+	var newItem = new Zotero.Item();
+	var xpath = '/html/body//comment()';
+	var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
+	var citation = ZU.xpath(doc, '//p/table/tbody//td/table/tbody[not(./script)]');
+	newItem.title = ZU.xpathText(citation, './/font/b');
+	newItem.itemType = "newspaperArticle";
+	var author = ZU.xpathText(citation, './/td/i');
+	if (author) newItem.creators.push(ZU.cleanAuthor(author, "author`"));
+	var date = ZU.xpathText(citation, './/td/text()');
+	if (date) date = date.match(/[A-Z][a-z]+\s\d+,\s\d{4}/);
+	if (date) newItem.date = date[0];
+	var pdfurl = ZU.xpathText(doc, '//blockquote/a[contains(@href, "!pdf")][1]/@href');
+	if  (pdfurl){
+		newItem.attachments.push({url: pdfurl, title: "Infotrac Full Text PDF", mimeType: "application/pdf"})
+	}
+	newItem.attachments.push({document: doc, title: "Infotrac Snapshot", mimeType: "text/html"});
+	while (elmt = elmts.iterateNext()) {
+		var colon = elmt.nodeValue.indexOf(":");
+		var field = elmt.nodeValue.substring(1, colon).toLowerCase();
+		var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1);
+		if (field == "journal") {
+			newItem.publicationTitle = value;
+		}
+	}
+	if (newItem.publicationTitle.search(/\(.+\)/)){
+		newItem.place = newItem.publicationTitle.match(/\((.+)\)/)[1];
+		newItem.publicationTitle = newItem.publicationTitle.replace(/\(.+\).*/, "");
+	}
+	newItem.complete();
+}
+
+function extractCitation(url, elmts, title, doc) {
+	var newItem = new Zotero.Item();
+	newItem.url = url;
+	if (title) {
+		newItem.title = Zotero.Utilities.superCleanString(title);
+	}
+	newItem.title = ZU.xpathText(citation, './/font/b');
+	newItem.itemType = "newspaperArticle";
+	var date = ZU.xpathText(citation, './/td/text()');
+	if (date) date = date.match(/[A-Z][a-z]+\s\d+,\s\d{4}/);
+	if (date) newItem.date = date[0];
+
+	while (elmt = elmts.iterateNext()) {
+		var colon = elmt.nodeValue.indexOf(":");
+		var field = elmt.nodeValue.substring(1, colon).toLowerCase();
+		var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1);
+		if (field == "title") {
+			newItem.title = Zotero.Utilities.superCleanString(value);
+		} else if (field == "journal") {
+			newItem.publicationTitle = value;
+		} else if (field == "pi") {
+			parts = value.split(" ");
+			var date = "";
+			var field = null;
+			for (j in parts) {
+				firstChar = parts[j].substring(0, 1);
+				
+				if (firstChar == "v") {
+					newItem.itemType = "journalArticle";
+					field = "volume";
+				} else if (firstChar == "i") {
+					field = "issue";
+				} else if (firstChar == "p") {
+					field = "pages";
+					
+					var pagesRegexp = /p(\w+)\((\w+)\)/;	// weird looking page range
+					var match = pagesRegexp.exec(parts[j]);
+					if (match) {			// yup, it's weird
+						var finalPage = parseInt(match[1])+parseInt(match[2])
+						parts[j] = "p"+match[1]+"-"+finalPage.toString();
+					} else if (!newItem.itemType) {	// no, it's normal
+						// check to see if it's numeric, bc newspaper pages aren't
+						var justPageNumber = parts[j].substr(1);
+						if (parseInt(justPageNumber).toString() != justPageNumber) {
+							newItem.itemType = "newspaperArticle";
+						}
+					}
+				} else if (!field) {	// date parts at the beginning, before
+									// anything else
+					date += " "+parts[j];
+				}
+				
+				if (field) {
+					isDate = false;
+					
+					if (parts[j] != "pNA") {		// make sure it's not an invalid
+												// page number
+						// chop of letter
+						newItem[field] = parts[j].substring(1);
+					} else if (!newItem.itemType) {		// only newspapers are missing
+														// page numbers on infotrac
+						newItem.itemType = "newspaperArticle";
+					}
+				}
+			}
+			
+			// Set type
+			if (!newItem.itemType) {
+				newItem.itemType = "magazineArticle";
+			}
+			
+			if (date != "") {
+				newItem.date = date.substring(1);
+			}
+		} else if (field == "author") {
+			var author = Zotero.Utilities.cleanAuthor(value, "author", true);
+			
+			// ensure author is not already there
+			var add = true;
+			for (var i=0; i<newItem.creators.length; i++) {
+				var existingAuthor = newItem.creators[i];
+				if (existingAuthor.firstName == author.firstName && existingAuthor.lastName == author.lastName) {
+					add = false;
+					break;
+				}
+			}
+			if (add) newItem.creators.push(author);
+		} else if (field == "issue") {
+			newItem.issue = value;
+		} else if (field == "volume") {
+			newItem.volume = value;
+		} else if (field == "issn") {
+			newItem.ISSN = value;
+		} else if (field == "gjd") {
+			var m = value.match(/\(([0-9]{4}[^\)]*)\)(?:, pp\. ([0-9\-]+))?/);
+			if (m) {
+				newItem.date = m[1];
+				newItem.pages = m[2];
+			}
+		} else if (field == "BookTitle") {
+			newItem.publicationTitle = value;
+		} else if (field == "genre") {
+			value = value.toLowerCase();
+			if (value == "article") {
+				newItem.itemType = "journalArticle";
+			} else if (value == "book") {
+				newItem.itemType = "book";
+			} else if (value == "dissertation") {
+				newItem.itemType = "thesis";
+			} else if (value == "bookitem") {
+				newItem.itemType = "bookSection";
+			}
+		}
+	}
+	
+	if (doc) {
+		newItem.attachments.push({document:doc, title:"InfoTrac Snapshot"});
+	} else {
+		newItem.attachments.push({url:url, title:"InfoTrac Snapshot",
+								 mimeType:"text/html"});
+	}
+	
+	newItem.complete();
+}
+
+function doWeb(doc, url) {	
+	var ncnp;
+	if (ZU.xpathText(doc, '//td//img[contains(@src, "ncnp_logo.gif")]/@title')) ncnp = true;
+	/*the only Infotrac Site that's still up & I'm aware of is 19th Century Newspapers. 
+	But there may well be others, so I'm leaving a lot of legacy code in just in case */
+
+	var uri = doc.location.href;
+	if (doc.title.substring(0, 8) == "Article ") {	// article
+		if (ncnp) scrape(doc, url);
+		else {
+			var xpath = '/html/body//comment()';
+			var elmts = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
+			extractCitation(uri, elmts);
+		}
+	} else {										// search results
+		var items = new Object();
+		var uris = new Array();
+		var elmts = new Array();
+		
+		var host = doc.location.href.match(/^https?:\/\/[^\/]+/)[0];
+		var baseurl = doc.location.href.match(/(.+)\/purl=/);
+		var institution = url.match(/\?sw_aep=.+/)[0];
+		var tableRows = doc.evaluate('/html/body//table/tbody/tr/td[b or strong]', doc, null,
+									 XPathResult.ANY_TYPE, null);
+		var tableRow;
+		var javaScriptRe = /'([^']*)' *, *'([^']*)'/
+		var i = 0;
+		// Go through table rows
+		if (ncnp){
+			while (tableRow = tableRows.iterateNext()) {
+				var title = ZU.trimInternal(ZU.xpathText(tableRow, './strong'));
+				var link = ZU.xpathText(tableRow, './a[1]/@href');
+				link = link.match(/\(\'(\/.+)\',\'/)[1];
+				link = baseurl[1] + link + institution;
+				//Z.debug(link)
+				items[link] = title;
+			}
+			Zotero.selectItems(items, function (items) {
+			if (!items) {
+				return true;
+			}
+			for (var i in items) {
+
+				uris.push(i);
+			}
+			Zotero.Utilities.processDocuments(uris, scrape)
+		});
+		}
+		else {
+			while (tableRow = tableRows.iterateNext()) {
+				var link = doc.evaluate('./a', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();
+				var m = javaScriptRe.exec(link.href);
+				if (m) {
+					uris[i] = host+"/itw/infomark/192/215/90714844w6"+m[1]+"?sw_aep=olr_wad"+m[2];
+				}
+				var article = doc.evaluate('./b/text()|./strong/text', link, null, XPathResult.ANY_TYPE, null).iterateNext();
+				items[i] = article.nodeValue;
+				// Chop off final period
+				if (items[i].substr(items[i].length-1) == ".") {
+					items[i] = items[i].substr(0, items[i].length-1);
+				}
+				elmts[i] = doc.evaluate(".//comment()", tableRow, null, XPathResult.ANY_TYPE, null);
+				citation[i] = ZU.xpath(tableRow, '//')
+				i++;
+			}
+			
+			Zotero.selectItems(items, function (items) {
+				if (!items) {
+					return true;
+				}
+				
+				for (var i in items) {
+					extractCitation(uris[i], elmts[i], items[i]);
+				}
+			});
+		}	
+	}
+}/** BEGIN TEST CASES **/
+var testCases = []
+/** END TEST CASES **/