ResourceType "MediaWikiPage"
C1544346692
Jump to navigation
Jump to search
|
Feeding
Source
https://github.com/dataspects/DataspectsMediaWikiFeeder
Entities (Namespace 0, 10, 106, 102)
Destination
$wgDataspectsApiURL/mediawikis/$wgMediaWikiMongoID/pages
NativeMongoDoc
$mongoDoc = array(
"slug" => "pending",
"resourceSiloType" => "pending",
"resourceSiloLabel" => "pending",
"resourceSiloID" => "pending",
"resourceType" => "MediaWikiPage",
"pagename" => $this->title->mTextform,
// Do we want the index.php?title= form here?
"rawUrl" => $this->title->getInternalURL(),
"shortUrl" => $this->title->getFullURL(),
"namespace" => $this->getNamespace($this->title->mNamespace),
"full" => array(
"wikitext" => $this->wikitext,
"text" => "NOT USED BECAUSE NO TIKA HERE",
"html" => $this->parsedWikitext
),
"categories" => $this->categories,
"annotations" => $this->annotations,
"feederClass" =>"DataspectsMediaWikiFeeder"
);
$this->categories[] = $category->mTextform;
# Only Namespace 0
$this->annotations[] = array(
'subject' => $this->title->mTextform,
'predicate' => $propertyName,
'object' => array(
'source' => str_replace('#0##', '', $object['item']),
'html' => '',
'text' => ''
)
);
Predicates (Namespace 102)
Destination
$wgDataspectsApiURL/predicates
NativeMongoDoc
$predicateMongodoc = array(
"predicate" => $this->title->mTextform,
"predicateType" => $this->annotations['_TYPE']['object'],
"predicateClass" => $this->annotations['HasPredicateClass']['object'],
"predicateNamespace" => $this->getNamespace($this->title->mNamespace),
"predicateCategories" => $this->categories
);
$this->categories[] = $category->mTextform;
Indexing
Entities
Destination
process.env.ES_NODE routing: document.id index: process.env.DEFAULT_ES_INDEX
NativeESDoc
let entityDoc = {
MyType: 'entity',
// Resource silo level
OriginatedFromResourceSiloID: document.resourceSiloID,
OriginatedFromResourceSiloLabel: document.resourceSiloLabel,
OriginatedFromResourceSiloType: document.resourceSiloType,
// Resource level
OriginatedFromResourceName: document.pagename,
OriginatedFromResourceURL: document.rawUrl,
OriginatedFromResourceType: document.resourceType,
// Entity/subject level
HasEntityClass: "",
HasEntityName: document.pagename,
HasEntityType: theHasEntityType,
HasEntityURL: document.rawUrl,
HasEntityTitle: hasEntityTitle,
HasEntityBlurbTEXT: hasEntityBlurb,
HasEntityBlurbHTML: "",
HasEntityContentSOURCE: document.full.wikitext,
HasEntityContentTEXT: entityContentText,
HasEntityContentHTML: html
}
theHasEntityType
const termOntEntityTypes = ["Template", "Form"];
var theHasEntityType = hasEntityType;
if (termOntEntityTypes.includes(document.namespace)) theHasEntityType = document.namespace;
DirectESField HasEntityTitle
is populated by the corresponding mediawikipage's NativeResourceAnnotation "HasEntityTitle".
var hasEntityTitle = "";
if (theAnnotation.predicate == "HasEntityTitle") hasEntityTitle = theAnnotation.object.source;
DirectESField HasEntityBlurbTEXT
is populated by the corresponding mediawikipage's NativeResourceAnnotation "HasEntityBlurb".
var hasEntityBlurb = "";
if (theAnnotation.predicate == "HasEntityBlurb") hasEntityBlurb = theAnnotation.object.source;
DirectESField HasEntityContentTEXT
is populated by entityContentText
from
$ = cheerio.load(document.full.html);
$('#dsMETADATA').remove();
html = $("body").html())
text = response.data[0]['X-TIKA:content']; // from html
entityContentText = text.replace(/\r?\n|\r/g, ' ').replace(/\s\s+/g
DirectESField HasEntityContentHTML
is populated by html
from
$ = cheerio.load(document.full.html);
$('#dsMETADATA').remove();
html = $("body").html())
Annotations
Destination
process.env.ES_NODE routing: theAnnotation.mediaWikiPage.toString() index: process.env.DEFAULT_ES_INDEX
NativeResourceAnnotations
A mediawikipage's NativeResourceAnnotations are looked up by Annotation.find({ mediaWikiPage: job.data.pageId
}.
NativeESDoc
let annotationDoc = {
MyType: {
name: 'annotation',
parent: theAnnotation.mediaWikiPage
},
subject: theAnnotation.subject,
predicate: theAnnotation.predicate,
objectSOURCE: theAnnotation.object.source,
objectHTML: theAnnotation.object.html,
objectTEXT: theAnnotation.object.text
};