-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 5db386b
Showing
10 changed files
with
165 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Some stuff | ||
# OCI-IFO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"_id": ObjectId("56b782be1d41c83ce0c7010b"), | ||
"author": "Jon Azpiri", | ||
"title": "2-year-old girl in Langley dies in ‘tragic accident’", | ||
"full_article": "<p>A two-year-old girl who was found in a Langley pond has died.</p><p>Neighbours, RCMP and search and rescue crews scoured an area around Robertson Crescent early Saturday evening for signs of the girl after she was reported missing. Shortly after 7 p.m., they discovered the girl in a pond they had drained.</p><p>She was rushed to hospital in grave condition. RCMP said she has since passed away.</p><p>Police described the incident as a “tragic accident” and said there were no signs of foul play.</p><p>No names have been released.</p><p class=\"story-ad align-middle\"></p>", | ||
"in_drupal": false, | ||
"language": "en", | ||
"media_url": "http://globalnews.ca/", | ||
"original_article_url": "http://globalnews.ca/news/2502837/2-year-old-girl-in-langley-dies-in-tragic-accident/", | ||
"published_date": "Sun, 07 Feb 2016 17:35:42 +0000", | ||
"redirected_article_url": "http://globalnews.ca/news/2502837/2-year-old-girl-in-langley-dies-in-tragic-accident/", | ||
"rss_url": "http://globalnews.ca/bc/feed/", | ||
"scraped_on": NumberInt(1454867134) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"_id": ObjectId("56993b2d1d41c8155edcc182"), | ||
"original_website_url": "http://www.nationalpost.com", | ||
"organisation_type": "media", | ||
"wiki_url": "http://en.wikipedia.org/wiki/National_Post", | ||
"dbpedia_url": "http://dbpedia.org/resource/National_Post", | ||
"organisation": "National Post", | ||
"avgcirculation": "92212.666666666666667", | ||
"redirected_website_url": "http://www.nationalpost.com/index.html", | ||
"scraped_on": NumberInt(1452882733), | ||
"dbpedia_owner": "http://dbpedia.org/resource/Postmedia_Network", | ||
"owner": "Postmedia Network Canada Corporation" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
{ | ||
"_id": ObjectId("56aa81491d41c866f058a0dc"), | ||
"rss_url": "http://www.chroniclejournal.com/search/?c%5B%5D=news%2Flocal%2Cnews%2Flocal%2F%2A&d=&d1=&d2=&f=rss&l=100&nsa=eedition&q=&s=start_time&sd=desc&t=article", | ||
"language": "en", | ||
"valid_rss": NumberInt(1), | ||
"scraped_on": NumberInt(1454014793), | ||
"media_url": "http://www.chroniclejournal.com/" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
<?php | ||
|
||
try { | ||
// connection to MongoDB on the IFO server | ||
$conn = new Mongo('mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR'); | ||
$db = $conn->scrapy; | ||
$collection = $db->articles; | ||
|
||
//Query to MongoDB | ||
//This query looks for a particular keyword in articles. "TransCanada" may be replaced by any keywords. | ||
$articles = array( | ||
'full_article' => array('$regex' => new MongoRegex("/^$TransCanada/i")) | ||
); | ||
|
||
$json = array(); | ||
$articles = $collection->find($articles)->limit(1000); | ||
foreach ($articles as $obj => $value) { | ||
$x = (array( | ||
$obj => $value | ||
) | ||
); | ||
$json[] = $x; | ||
} | ||
echo json_encode($json); | ||
|
||
// disconnect from server | ||
$conn->close(); | ||
} catch (MongoConnectionException $e) { | ||
die('Error connecting to MongoDB server'); | ||
} catch (MongoException $e) { | ||
die('Error: ' . $e->getMessage()); | ||
} | ||
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from pymongo import MongoClient | ||
|
||
# connection to MongoDB on the IFO server | ||
uri = "mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR" | ||
client = MongoClient(uri) | ||
db = client.scrapy | ||
collection = db.articles | ||
|
||
#uncomment to save output to file | ||
#f = open('data.txt', 'w') | ||
|
||
#Query to MongoDB | ||
#This query outputs a list of articles wrtitten by an author. | ||
articles = collection.find({"author": "Ryan White" }).limit( 1000 ) | ||
for x in articles: | ||
print x | ||
#uncomment to save output to file | ||
# print >>f, x | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from pymongo import MongoClient | ||
|
||
# connection to MongoDB on the IFO server | ||
uri = "mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR" | ||
client = MongoClient(uri) | ||
db = client.scrapy | ||
collection = db.articles | ||
|
||
#uncomment to save output to file | ||
#f = open('data.txt', 'w') | ||
|
||
#Query to MongoDB | ||
#This query looks for a particular keyword in articles. TransCanada may be replaced by anything | ||
articles = collection.find({"full_article": {'$regex':'TransCanada'}}).limit( 1000 ) | ||
for x in articles: | ||
print x | ||
#uncomment to save output to file | ||
# print >>f, x | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from pymongo import MongoClient | ||
|
||
# connection to MongoDB on the IFO server | ||
uri = "mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR" | ||
client = MongoClient(uri) | ||
db = client.scrapy | ||
collection = db.articles | ||
|
||
#uncomment to save output to file | ||
#f = open('data.txt', 'w') | ||
|
||
#Query to MongoDB | ||
#This query outputs a list of articles published by a media. | ||
articles = collection.find({"media_url": "http://www.thestar.com/" }).limit( 1000 ) | ||
for x in articles: | ||
print x | ||
#uncomment to save output to file | ||
# print >>f, x | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from pymongo import MongoClient | ||
|
||
# connection to MongoDB on the IFO server | ||
uri = "mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR" | ||
client = MongoClient(uri) | ||
db = client.scrapy | ||
collection = db.websites | ||
|
||
#uncomment to save output to file | ||
#f = open('data.txt', 'w') | ||
|
||
#Query to MongoDB | ||
#This query outputs a list media in the database. | ||
media = collection.find({"original_website_url": {"$exists" : 1}}).limit( 1000 ) | ||
for x in media: | ||
print x | ||
#uncomment to save output to file | ||
# print >>f, x | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from pymongo import MongoClient | ||
|
||
# connection to MongoDB on the IFO server | ||
uri = "mongodb://hackathon_reader:To0305@www.oci-ifo.org:27017/scrapy?authMechanism=MONGODB-CR" | ||
client = MongoClient(uri) | ||
db = client.scrapy | ||
collection = db.websites | ||
|
||
#uncomment to save output to file | ||
#f = open('data.txt', 'w') | ||
|
||
#Query to MongoDB | ||
#This query outputs a list newspapers and their circulation. | ||
media = collection.find({"avgcirculation": {"$ne" : None}}).limit( 1000 ) | ||
for x in media: | ||
print x | ||
#uncomment to save output to file | ||
# print >>f, x | ||
|