Initial Commit

2025-12-06 09:50:25 +01:00
commit a5f30a65e6
763 changed files with 309904 additions and 0 deletions
--- a/translators/LingBuzz.js
+++ b/translators/LingBuzz.js
@@ -0,0 +1,359 @@
+{
+	"translatorID": "e048e70e-8fea-43e9-ac8e-940bc3d71b0b",
+	"label": "LingBuzz",
+	"creator": "Göktuğ Kayaalp and Abe Jellinek",
+	"target": "^https://(ling\\.auf|lingbuzz)\\.net/lingbuzz/(repo/semanticsArchive/article/)?(\\d+|_search)",
+	"minVersion": "3.0",
+	"maxVersion": "",
+	"priority": 100,
+	"inRepository": true,
+	"translatorType": 4,
+	"browserSupport": "gcsibv",
+	"lastUpdated": "2022-05-04 01:00:37"
+}
+
+/*
+	***** BEGIN LICENSE BLOCK *****
+
+	Copyright © 2021 Göktuğ Kayaalp <self at gkayaalp dot com> and Abe Jellinek
+
+	This file is part of Zotero.
+
+	Zotero is free software: you can redistribute it and/or modify
+	it under the terms of the GNU Affero General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	Zotero is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+	GNU Affero General Public License for more details.
+
+	You should have received a copy of the GNU Affero General Public License
+	along with Zotero. If not, see <http://www.gnu.org/licenses/>.
+
+	***** END LICENSE BLOCK *****
+*/
+
+const preprintType = ZU.fieldIsValidForType('title', 'preprint')
+	? 'preprint'
+	: 'report';
+
+function detectWeb(doc, url) {
+	if (url.includes("/_search") && getSearchResults(doc, true)) {
+		return "multiple";
+	}
+	return preprintType;
+}
+
+function getSearchResults(doc, checkOnly) {
+	var items = {};
+	var found = false;
+	// exclude author links
+	var rows = doc.querySelectorAll('td a:not([href*="?_s="])');
+	for (let row of rows) {
+		let href = row.href;
+		let title = ZU.trimInternal(
+			row.textContent.replace(/\s+\[semanticsArchive\]$/, "")
+		);
+		if (!href || !title) continue;
+		if (checkOnly) return true;
+		found = true;
+		items[href] = title;
+	}
+	return found ? items : false;
+}
+
+function doWeb(doc, url) {
+	if (detectWeb(doc, url) == "multiple") {
+		Zotero.selectItems(getSearchResults(doc, false), function (items) {
+			if (items) ZU.processDocuments(Object.keys(items), scrape);
+		});
+	}
+	else {
+		scrape(doc, url);
+	}
+}
+
+function scrape(doc, url) {
+	if (url.match(/semanticsArchive/)) {
+		scrapeSA(doc, url);
+		return;
+	}
+	
+	var newItem = new Zotero.Item(preprintType);
+	if (preprintType == "report") {
+		newItem.extra = "type: article\n";
+	}
+
+	// Collect information.
+	var idBlock = doc.querySelector("center");
+	var title = text(idBlock, "a[href*='.pdf']");
+	var authors = idBlock.querySelectorAll("a[href*='_k=']");
+	// These are unpleasant but they're the best we have.
+	var date = idBlock.lastChild.textContent;
+	var abstract = idBlock.nextElementSibling.nextSibling.textContent;
+
+	var tableRows = doc.querySelectorAll("tbody tr");
+	for (let row of tableRows) {
+		let [left, right] = row.querySelectorAll("td");
+		if (!left || !right) continue;
+		let fieldName = left.innerText.toLowerCase();
+		if (fieldName.includes("format")) {
+			let pdfUrl = right.querySelector("a[href*='.pdf']").href;
+			newItem.attachments.push({ url: pdfUrl, title: "LingBuzz Full Text PDF", mimeType: "application/pdf" });
+		}
+		else if (fieldName.includes("keywords")) {
+			newItem.tags.push(...right.innerText.split(/[;,] /));
+		}
+		else if (fieldName.includes("published in")) {
+			newItem.extra = (newItem.extra || '') + 'LingBuzz Published In: ' + right.innerText + '\n';
+		}
+	}
+
+	newItem.title = title;
+	for (let authorLink of authors) {
+		newItem.creators.push(
+			Zotero.Utilities.cleanAuthor(authorLink.innerText, "author"));
+	}
+	newItem.abstractNote = abstract;
+	newItem.date = ZU.strToISO(date);
+	newItem.url = url;
+	newItem.attachments.push({ document: doc, title: "Snapshot" });
+	newItem.publisher = "LingBuzz";
+
+	newItem.complete();
+}
+
+function scrapeSA(doc, url) {
+	var newItem = new Zotero.Item(preprintType);
+	if (preprintType == "report") {
+		newItem.extra = "type: article\n";
+	}
+
+	// Collect information.
+	var idBlock = doc.querySelector("center");
+	// This is even worse than the usual LingBuzz pages.
+	var title = text(idBlock, "a:first-child");
+	var authors = idBlock.querySelectorAll("a:not(:first-child)");
+	// These are unpleasant but they're the best we have.
+	var date = idBlock.lastChild.textContent;
+
+	let pdfUrl = idBlock.querySelector("a:first-child").href;
+	newItem.attachments.push({ url: pdfUrl,
+							   title: "LingBuzz (SemanticsArchive) Full Text PDF",
+							   mimeType: "application/pdf" });
+
+	var tableRows = doc.querySelectorAll("tbody tr");
+	for (let row of tableRows) {
+		let [left, right] = row.querySelectorAll("td");
+		if (!left || !right) continue;
+		let fieldName = left.innerText.toLowerCase();
+		if (fieldName.includes("keywords")) {
+			newItem.tags.push(...right.innerText.split(/[;,] /));
+		}
+	}
+
+	newItem.title = title;
+	for (let authorLink of authors) {
+		newItem.creators.push(
+			Zotero.Utilities.cleanAuthor(authorLink.innerText, "author"));
+	}
+	newItem.date = ZU.strToISO(date);
+	newItem.url = url;
+	newItem.attachments.push({ document: doc, title: "Snapshot" });
+	newItem.publisher = "LingBuzz (SemanticsArchive)";
+
+	newItem.complete();
+}
+/** BEGIN TEST CASES **/
+var testCases = [
+	{
+		"type": "web",
+		"url": "https://ling.auf.net/lingbuzz/005988",
+		"items": [
+			{
+				"itemType": "preprint",
+				"title": "Verb height indeed determines prosodic phrasing: evidence from Iron Ossetic",
+				"creators": [
+					{
+						"firstName": "Lena",
+						"lastName": "Borise",
+						"creatorType": "author"
+					},
+					{
+						"firstName": "David",
+						"lastName": "Erschler",
+						"creatorType": "author"
+					}
+				],
+				"date": "2021-05",
+				"abstractNote": "We provide novel evidence in favor of the proposal by Hamlaoui and Szendrői (2015, 2017), who argue for a flexible mapping between an Intonational Phrase (ɩ) and syntactic constituents. According to them, ɩ corresponds to the highest projection that hosts verbal material, together with its specifier. The prediction is that the size of ɩ co-varies with the height of the verb, if the latter is variable. Our evidence comes from Iron Ossetic (East Iranian), a language with multiple projections available for verb raising, depending on context. The flexible ɩ-mapping approach – but not more rigid approaches to ɩ-formation – can account for the properties of ɩ-formation in Iron Ossetic. This applies to the prosody of utterances that contain negative indefinites, narrow foci, and single wh-phrases. More complex wh-questions (those with multiple wh-phrases and/or negative indefinites) provide evidence that syntax-based flexible ɩ-mapping approach interacts with language-specific eurhythmic constraints. The Iron Ossetic facts, therefore, provide support for the flexible ɩ-mapping approach, which has not been tested until now on languages of this type.",
+				"extra": "LingBuzz Published In: Proceedings of NELS 51",
+				"libraryCatalog": "LingBuzz",
+				"repository": "LingBuzz",
+				"shortTitle": "Verb height indeed determines prosodic phrasing",
+				"url": "https://ling.auf.net/lingbuzz/005988",
+				"attachments": [
+					{
+						"title": "LingBuzz Full Text PDF",
+						"mimeType": "application/pdf"
+					},
+					{
+						"title": "Snapshot",
+						"mimeType": "text/html"
+					}
+				],
+				"tags": [
+					{
+						"tag": "focus"
+					},
+					{
+						"tag": "iranian"
+					},
+					{
+						"tag": "iron ossetic"
+					},
+					{
+						"tag": "phonology"
+					},
+					{
+						"tag": "prosodic phrasing"
+					},
+					{
+						"tag": "syntax"
+					},
+					{
+						"tag": "syntax-prosody interface"
+					},
+					{
+						"tag": "wh-questions"
+					}
+				],
+				"notes": [],
+				"seeAlso": []
+			}
+		]
+	},
+	{
+		"type": "web",
+		"url": "https://ling.auf.net/lingbuzz/repo/semanticsArchive/article/001471",
+		"items": [
+			{
+				"itemType": "preprint",
+				"title": "Review of Barker and Shan (2015) Continuations and Natural Language",
+				"creators": [
+					{
+						"firstName": "Yusuke",
+						"lastName": "Kubota",
+						"creatorType": "author"
+					}
+				],
+				"date": "2015-06",
+				"libraryCatalog": "LingBuzz",
+				"repository": "LingBuzz (SemanticsArchive)",
+				"url": "https://ling.auf.net/lingbuzz/repo/semanticsArchive/article/001471",
+				"attachments": [
+					{
+						"title": "LingBuzz (SemanticsArchive) Full Text PDF",
+						"mimeType": "application/pdf"
+					},
+					{
+						"title": "Snapshot",
+						"mimeType": "text/html"
+					}
+				],
+				"tags": [
+					{
+						"tag": "binding"
+					},
+					{
+						"tag": "categorial grammar"
+					},
+					{
+						"tag": "continuations"
+					},
+					{
+						"tag": "crossover"
+					},
+					{
+						"tag": "reconstruction"
+					},
+					{
+						"tag": "scope"
+					},
+					{
+						"tag": "semantics"
+					},
+					{
+						"tag": "semanticsarchive"
+					},
+					{
+						"tag": "syntax"
+					}
+				],
+				"notes": [],
+				"seeAlso": []
+			}
+		]
+	},
+	{
+		"type": "web",
+		"url": "https://ling.auf.net/lingbuzz/_search?q=svan",
+		"items": "multiple"
+	},
+	{
+		"type": "web",
+		"url": "https://ling.auf.net/lingbuzz/_search?q=construction+grammar",
+		"items": "multiple"
+	},
+	{
+		"type": "web",
+		"url": "https://ling.auf.net/lingbuzz/_search?q=semanticsarchive",
+		"items": "multiple"
+	},
+	{
+		"type": "web",
+		"url": "https://lingbuzz.net/lingbuzz/006559",
+		"items": [
+			{
+				"itemType": "preprint",
+				"title": "Object drop in Spanish is not island-sensitive",
+				"creators": [
+					{
+						"firstName": "Matías",
+						"lastName": "Verdecchia",
+						"creatorType": "author"
+					}
+				],
+				"date": "2022-04",
+				"abstractNote": "Campos (1986) argues that object drop in Spanish exhibits island effects. This claim has remained unchallenged up to date and is largely assumed in the literature. In this squib, I show that this characterization is not empirically correct: given a proper discourse context, null objects can easily appear within a syntactic island in Spanish. This observation constitutes a non-trivial problem for object drop analyses based on movement.",
+				"extra": "LingBuzz Published In: To appear in Journal of Linguistics",
+				"libraryCatalog": "LingBuzz",
+				"repository": "LingBuzz",
+				"url": "https://lingbuzz.net/lingbuzz/006559",
+				"attachments": [
+					{
+						"title": "LingBuzz Full Text PDF",
+						"mimeType": "application/pdf"
+					},
+					{
+						"title": "Snapshot",
+						"mimeType": "text/html"
+					}
+				],
+				"tags": [
+					{
+						"tag": "object drop - islands - spanish - movement"
+					},
+					{
+						"tag": "syntax"
+					}
+				],
+				"notes": [],
+				"seeAlso": []
+			}
+		]
+	}
+]
+/** END TEST CASES **/