Initial Commit
This commit is contained in:
359
translators/LingBuzz.js
Normal file
359
translators/LingBuzz.js
Normal file
@@ -0,0 +1,359 @@
|
||||
{
|
||||
"translatorID": "e048e70e-8fea-43e9-ac8e-940bc3d71b0b",
|
||||
"label": "LingBuzz",
|
||||
"creator": "Göktuğ Kayaalp and Abe Jellinek",
|
||||
"target": "^https://(ling\\.auf|lingbuzz)\\.net/lingbuzz/(repo/semanticsArchive/article/)?(\\d+|_search)",
|
||||
"minVersion": "3.0",
|
||||
"maxVersion": "",
|
||||
"priority": 100,
|
||||
"inRepository": true,
|
||||
"translatorType": 4,
|
||||
"browserSupport": "gcsibv",
|
||||
"lastUpdated": "2022-05-04 01:00:37"
|
||||
}
|
||||
|
||||
/*
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
|
||||
Copyright © 2021 Göktuğ Kayaalp <self at gkayaalp dot com> and Abe Jellinek
|
||||
|
||||
This file is part of Zotero.
|
||||
|
||||
Zotero is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Zotero is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
const preprintType = ZU.fieldIsValidForType('title', 'preprint')
|
||||
? 'preprint'
|
||||
: 'report';
|
||||
|
||||
function detectWeb(doc, url) {
|
||||
if (url.includes("/_search") && getSearchResults(doc, true)) {
|
||||
return "multiple";
|
||||
}
|
||||
return preprintType;
|
||||
}
|
||||
|
||||
function getSearchResults(doc, checkOnly) {
|
||||
var items = {};
|
||||
var found = false;
|
||||
// exclude author links
|
||||
var rows = doc.querySelectorAll('td a:not([href*="?_s="])');
|
||||
for (let row of rows) {
|
||||
let href = row.href;
|
||||
let title = ZU.trimInternal(
|
||||
row.textContent.replace(/\s+\[semanticsArchive\]$/, "")
|
||||
);
|
||||
if (!href || !title) continue;
|
||||
if (checkOnly) return true;
|
||||
found = true;
|
||||
items[href] = title;
|
||||
}
|
||||
return found ? items : false;
|
||||
}
|
||||
|
||||
function doWeb(doc, url) {
|
||||
if (detectWeb(doc, url) == "multiple") {
|
||||
Zotero.selectItems(getSearchResults(doc, false), function (items) {
|
||||
if (items) ZU.processDocuments(Object.keys(items), scrape);
|
||||
});
|
||||
}
|
||||
else {
|
||||
scrape(doc, url);
|
||||
}
|
||||
}
|
||||
|
||||
function scrape(doc, url) {
|
||||
if (url.match(/semanticsArchive/)) {
|
||||
scrapeSA(doc, url);
|
||||
return;
|
||||
}
|
||||
|
||||
var newItem = new Zotero.Item(preprintType);
|
||||
if (preprintType == "report") {
|
||||
newItem.extra = "type: article\n";
|
||||
}
|
||||
|
||||
// Collect information.
|
||||
var idBlock = doc.querySelector("center");
|
||||
var title = text(idBlock, "a[href*='.pdf']");
|
||||
var authors = idBlock.querySelectorAll("a[href*='_k=']");
|
||||
// These are unpleasant but they're the best we have.
|
||||
var date = idBlock.lastChild.textContent;
|
||||
var abstract = idBlock.nextElementSibling.nextSibling.textContent;
|
||||
|
||||
var tableRows = doc.querySelectorAll("tbody tr");
|
||||
for (let row of tableRows) {
|
||||
let [left, right] = row.querySelectorAll("td");
|
||||
if (!left || !right) continue;
|
||||
let fieldName = left.innerText.toLowerCase();
|
||||
if (fieldName.includes("format")) {
|
||||
let pdfUrl = right.querySelector("a[href*='.pdf']").href;
|
||||
newItem.attachments.push({ url: pdfUrl, title: "LingBuzz Full Text PDF", mimeType: "application/pdf" });
|
||||
}
|
||||
else if (fieldName.includes("keywords")) {
|
||||
newItem.tags.push(...right.innerText.split(/[;,] /));
|
||||
}
|
||||
else if (fieldName.includes("published in")) {
|
||||
newItem.extra = (newItem.extra || '') + 'LingBuzz Published In: ' + right.innerText + '\n';
|
||||
}
|
||||
}
|
||||
|
||||
newItem.title = title;
|
||||
for (let authorLink of authors) {
|
||||
newItem.creators.push(
|
||||
Zotero.Utilities.cleanAuthor(authorLink.innerText, "author"));
|
||||
}
|
||||
newItem.abstractNote = abstract;
|
||||
newItem.date = ZU.strToISO(date);
|
||||
newItem.url = url;
|
||||
newItem.attachments.push({ document: doc, title: "Snapshot" });
|
||||
newItem.publisher = "LingBuzz";
|
||||
|
||||
newItem.complete();
|
||||
}
|
||||
|
||||
function scrapeSA(doc, url) {
|
||||
var newItem = new Zotero.Item(preprintType);
|
||||
if (preprintType == "report") {
|
||||
newItem.extra = "type: article\n";
|
||||
}
|
||||
|
||||
// Collect information.
|
||||
var idBlock = doc.querySelector("center");
|
||||
// This is even worse than the usual LingBuzz pages.
|
||||
var title = text(idBlock, "a:first-child");
|
||||
var authors = idBlock.querySelectorAll("a:not(:first-child)");
|
||||
// These are unpleasant but they're the best we have.
|
||||
var date = idBlock.lastChild.textContent;
|
||||
|
||||
let pdfUrl = idBlock.querySelector("a:first-child").href;
|
||||
newItem.attachments.push({ url: pdfUrl,
|
||||
title: "LingBuzz (SemanticsArchive) Full Text PDF",
|
||||
mimeType: "application/pdf" });
|
||||
|
||||
var tableRows = doc.querySelectorAll("tbody tr");
|
||||
for (let row of tableRows) {
|
||||
let [left, right] = row.querySelectorAll("td");
|
||||
if (!left || !right) continue;
|
||||
let fieldName = left.innerText.toLowerCase();
|
||||
if (fieldName.includes("keywords")) {
|
||||
newItem.tags.push(...right.innerText.split(/[;,] /));
|
||||
}
|
||||
}
|
||||
|
||||
newItem.title = title;
|
||||
for (let authorLink of authors) {
|
||||
newItem.creators.push(
|
||||
Zotero.Utilities.cleanAuthor(authorLink.innerText, "author"));
|
||||
}
|
||||
newItem.date = ZU.strToISO(date);
|
||||
newItem.url = url;
|
||||
newItem.attachments.push({ document: doc, title: "Snapshot" });
|
||||
newItem.publisher = "LingBuzz (SemanticsArchive)";
|
||||
|
||||
newItem.complete();
|
||||
}
|
||||
/** BEGIN TEST CASES **/
|
||||
var testCases = [
|
||||
{
|
||||
"type": "web",
|
||||
"url": "https://ling.auf.net/lingbuzz/005988",
|
||||
"items": [
|
||||
{
|
||||
"itemType": "preprint",
|
||||
"title": "Verb height indeed determines prosodic phrasing: evidence from Iron Ossetic",
|
||||
"creators": [
|
||||
{
|
||||
"firstName": "Lena",
|
||||
"lastName": "Borise",
|
||||
"creatorType": "author"
|
||||
},
|
||||
{
|
||||
"firstName": "David",
|
||||
"lastName": "Erschler",
|
||||
"creatorType": "author"
|
||||
}
|
||||
],
|
||||
"date": "2021-05",
|
||||
"abstractNote": "We provide novel evidence in favor of the proposal by Hamlaoui and Szendrői (2015, 2017), who argue for a flexible mapping between an Intonational Phrase (ɩ) and syntactic constituents. According to them, ɩ corresponds to the highest projection that hosts verbal material, together with its specifier. The prediction is that the size of ɩ co-varies with the height of the verb, if the latter is variable. Our evidence comes from Iron Ossetic (East Iranian), a language with multiple projections available for verb raising, depending on context. The flexible ɩ-mapping approach – but not more rigid approaches to ɩ-formation – can account for the properties of ɩ-formation in Iron Ossetic. This applies to the prosody of utterances that contain negative indefinites, narrow foci, and single wh-phrases. More complex wh-questions (those with multiple wh-phrases and/or negative indefinites) provide evidence that syntax-based flexible ɩ-mapping approach interacts with language-specific eurhythmic constraints. The Iron Ossetic facts, therefore, provide support for the flexible ɩ-mapping approach, which has not been tested until now on languages of this type.",
|
||||
"extra": "LingBuzz Published In: Proceedings of NELS 51",
|
||||
"libraryCatalog": "LingBuzz",
|
||||
"repository": "LingBuzz",
|
||||
"shortTitle": "Verb height indeed determines prosodic phrasing",
|
||||
"url": "https://ling.auf.net/lingbuzz/005988",
|
||||
"attachments": [
|
||||
{
|
||||
"title": "LingBuzz Full Text PDF",
|
||||
"mimeType": "application/pdf"
|
||||
},
|
||||
{
|
||||
"title": "Snapshot",
|
||||
"mimeType": "text/html"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"tag": "focus"
|
||||
},
|
||||
{
|
||||
"tag": "iranian"
|
||||
},
|
||||
{
|
||||
"tag": "iron ossetic"
|
||||
},
|
||||
{
|
||||
"tag": "phonology"
|
||||
},
|
||||
{
|
||||
"tag": "prosodic phrasing"
|
||||
},
|
||||
{
|
||||
"tag": "syntax"
|
||||
},
|
||||
{
|
||||
"tag": "syntax-prosody interface"
|
||||
},
|
||||
{
|
||||
"tag": "wh-questions"
|
||||
}
|
||||
],
|
||||
"notes": [],
|
||||
"seeAlso": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "web",
|
||||
"url": "https://ling.auf.net/lingbuzz/repo/semanticsArchive/article/001471",
|
||||
"items": [
|
||||
{
|
||||
"itemType": "preprint",
|
||||
"title": "Review of Barker and Shan (2015) Continuations and Natural Language",
|
||||
"creators": [
|
||||
{
|
||||
"firstName": "Yusuke",
|
||||
"lastName": "Kubota",
|
||||
"creatorType": "author"
|
||||
}
|
||||
],
|
||||
"date": "2015-06",
|
||||
"libraryCatalog": "LingBuzz",
|
||||
"repository": "LingBuzz (SemanticsArchive)",
|
||||
"url": "https://ling.auf.net/lingbuzz/repo/semanticsArchive/article/001471",
|
||||
"attachments": [
|
||||
{
|
||||
"title": "LingBuzz (SemanticsArchive) Full Text PDF",
|
||||
"mimeType": "application/pdf"
|
||||
},
|
||||
{
|
||||
"title": "Snapshot",
|
||||
"mimeType": "text/html"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"tag": "binding"
|
||||
},
|
||||
{
|
||||
"tag": "categorial grammar"
|
||||
},
|
||||
{
|
||||
"tag": "continuations"
|
||||
},
|
||||
{
|
||||
"tag": "crossover"
|
||||
},
|
||||
{
|
||||
"tag": "reconstruction"
|
||||
},
|
||||
{
|
||||
"tag": "scope"
|
||||
},
|
||||
{
|
||||
"tag": "semantics"
|
||||
},
|
||||
{
|
||||
"tag": "semanticsarchive"
|
||||
},
|
||||
{
|
||||
"tag": "syntax"
|
||||
}
|
||||
],
|
||||
"notes": [],
|
||||
"seeAlso": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "web",
|
||||
"url": "https://ling.auf.net/lingbuzz/_search?q=svan",
|
||||
"items": "multiple"
|
||||
},
|
||||
{
|
||||
"type": "web",
|
||||
"url": "https://ling.auf.net/lingbuzz/_search?q=construction+grammar",
|
||||
"items": "multiple"
|
||||
},
|
||||
{
|
||||
"type": "web",
|
||||
"url": "https://ling.auf.net/lingbuzz/_search?q=semanticsarchive",
|
||||
"items": "multiple"
|
||||
},
|
||||
{
|
||||
"type": "web",
|
||||
"url": "https://lingbuzz.net/lingbuzz/006559",
|
||||
"items": [
|
||||
{
|
||||
"itemType": "preprint",
|
||||
"title": "Object drop in Spanish is not island-sensitive",
|
||||
"creators": [
|
||||
{
|
||||
"firstName": "Matías",
|
||||
"lastName": "Verdecchia",
|
||||
"creatorType": "author"
|
||||
}
|
||||
],
|
||||
"date": "2022-04",
|
||||
"abstractNote": "Campos (1986) argues that object drop in Spanish exhibits island effects. This claim has remained unchallenged up to date and is largely assumed in the literature. In this squib, I show that this characterization is not empirically correct: given a proper discourse context, null objects can easily appear within a syntactic island in Spanish. This observation constitutes a non-trivial problem for object drop analyses based on movement.",
|
||||
"extra": "LingBuzz Published In: To appear in Journal of Linguistics",
|
||||
"libraryCatalog": "LingBuzz",
|
||||
"repository": "LingBuzz",
|
||||
"url": "https://lingbuzz.net/lingbuzz/006559",
|
||||
"attachments": [
|
||||
{
|
||||
"title": "LingBuzz Full Text PDF",
|
||||
"mimeType": "application/pdf"
|
||||
},
|
||||
{
|
||||
"title": "Snapshot",
|
||||
"mimeType": "text/html"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"tag": "object drop - islands - spanish - movement"
|
||||
},
|
||||
{
|
||||
"tag": "syntax"
|
||||
}
|
||||
],
|
||||
"notes": [],
|
||||
"seeAlso": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
/** END TEST CASES **/
|
||||
Reference in New Issue
Block a user