scrapy Redirect 302 - redirect

i am just crawl to a websit.but redirecting anthor page. in spider i added
handle_httpstatus_list = [302,301]
and overwrite the start_requests method. but problem is
AttributeError: 'Response' object has no attribute 'xpath'
spider code:
# -*- coding=utf-8 -*-
from __future__ import absolute_import
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider,Rule,Spider
from car.items import Car58Item
import scrapy
import time
class Car51Spider (CrawlSpider):
name = 'car51'
allowed_domains = ['51auto.com']
start_urls = ['http://www.51auto.com/quanguo/pabmdcigf?searchtype=searcarlist&curentPage=1&isNewsCar=0&isSaleCar=0&isQa=0&orderValue=record_time']
rules = [Rule(LinkExtractor(allow=('/pabmdcigf?searchtype=searcarlist&curentPage=\d+\&isNewsCar\=0\&isSaleCar\=0\&isQa\=0\&orderValue\=record_time')),callback='parse_item',follow=True)] #//页面读取策略
handle_httpstatus_list = [302,301]
items = {}
def start_requests(self):
for url in self.start_urls:
yield scrapy.Request(url, dont_filter=True, callback=self.parse_item)
def parse_item(self,response):
trs = response.xpath("//div[#class='view-grid-overflow']/a").extract()
for tr in trs:
sales_1 = u''
item = Car58Item()
urls = tr.xpath("a/#href").extract_first()
item['url'] = tr.xpath("a/#href").extract_first()
item['tip'] = tr.xpath("a/ul/li[#class='title']/text()").extract_first()
item['name'] = tr.xpath("a/ul/li[#class='title']/text()").extract_first()
sales_times = tr.xpath("a/ul/li[#class='info']/span/text()").extract()
for x in sales_times:
sales_1 = sales_1 + x
item['sales_time'] = sales_1
item['region'] = tr.xpath("a/ul/li[#class='info']/span[#class='font-color-red']/text()").extract_first()
item['amt'] = tr.xpath("a/ul/li[#class='price']/div[1]/text()").extract_first()
yield scrapy.Request(url=urls,callback=self.parse_netsted_item,meta={'item':item})
def parse_netsted_item(self,response):
dh = u''
dha = u''
mode = response.xpath("//body")
item = Car58Item(response.meta['item'])
dhs = mode.xpath("//div[#id='contact-tel1']/p/text()").extract()
for x in dhs:
dh = dh + x
item['lianxiren_dh'] = dh
lianxiren = mode.xpath("//div[#class='section-contact']/text()").extract()
item['lianxiren'] = lianxiren[1]
item['lianxiren_dz'] = lianxiren[2]
item['details'] = mode.xpath("//div[#id='car-dangan']").extract()
desc = mode.xpath("//div[#class='car-detail-container']/p/text()").extract()
for d in desc:
dha = dha + d
item['description'] = dha
item['image_urls'] = mode.xpath("//div[#class='car-pic']/img/#src").extract()
item['collection_dt'] = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
return item
settting.py
# -*- coding: utf-8 -*-
# Scrapy settings for car project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
BOT_NAME = 'car'
SPIDER_MODULES = ['car.spiders.car51']
#NEWSPIDER_MODULE = 'car.spiders.zhaoming'
DEFAULT_ITEM_CLASS = 'car.items.Car58Item'
ITEM_PIPELINES = {'scrapy.contrib.pipeline.images.ImagesPipeline': 1,
'car.pipelines.MongoDBPipeline': 300,
'car.pipelines.Car58ImagesPipeline': 301
}
MONGODB_SERVER ="localhost"
MONGODB_PORT=27017
MONGODB_DB="car"
MONGODB_COLLECTION_CAR="car"
MONGODB_COLLECTION_ZHAOMING="zhaoming"
IMAGES_STORE = "img/"
DOWNLOAD_DELAY = 0.25 # 250 ms of delay
IMAGES_EXPIRES = 90
DOWNLOAD_TIMEOUT=10
LOG_ENABLED=True
LOG_ENCODING='utf-8'
LOG_LEVEL="DEBUG"
LOGSTATS_INTERVAL=5
# LOG_FILE='/tmp/scrapy.log'
CONCURRENT_REQUESTS_PER_DOMAIN=16
#CONCURRENT_REQUESTS_PER_IP=16
scrapy log:
$scrapy crawl car51
2016-06-14 14:18:38 [scrapy] INFO: Scrapy 1.1.0 started (bot: car)
2016-06-14 14:18:38 [scrapy] INFO: Overridden settings: {'CONCURRENT_REQUESTS_PER_DOMAIN': 16, 'SPIDER_MODULES': ['car.spiders.car51'], 'BOT_NAME': 'car', 'DOWNLOAD_TIMEOUT': 10, 'LOGSTATS_INTERVAL': 5, 'USER_AGENT': 'Mozilla/5.0 (Windows NT 6.1; rv:35.0) Gecko/20100101 Firefox/35.0', 'DEFAULT_ITEM_CLASS': 'car.items.Car58Item', 'DOWNLOAD_DELAY': 0.25}
2016-06-14 14:18:38 [scrapy] INFO: Enabled extensions:
['scrapy.extensions.logstats.LogStats',
'scrapy.extensions.telnet.TelnetConsole',
'scrapy.extensions.corestats.CoreStats']
2016-06-14 14:18:38 [scrapy] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
'scrapy.downloadermiddlewares.retry.RetryMiddleware',
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
'scrapy.downloadermiddlewares.chunked.ChunkedTransferMiddleware',
'scrapy.downloadermiddlewares.stats.DownloaderStats']
2016-06-14 14:18:38 [scrapy] INFO: Enabled spider middlewares:
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
'scrapy.spidermiddlewares.referer.RefererMiddleware',
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
'scrapy.spidermiddlewares.depth.DepthMiddleware']
2016-06-14 14:18:38 [py.warnings] WARNING: /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/utils/deprecate.py:156: ScrapyDeprecationWarning: `scrapy.contrib.pipeline.images.ImagesPipeline` class is deprecated, use `scrapy.pipelines.images.ImagesPipeline` instead
ScrapyDeprecationWarning)
2016-06-14 14:18:38 [py.warnings] WARNING: /Users/mayuping/PycharmProjects/car/car/pipelines.py:13: ScrapyDeprecationWarning: Module `scrapy.log` has been deprecated, Scrapy now relies on the builtin Python library for logging. Read the updated logging entry in the documentation to learn more.
from scrapy import log
2016-06-14 14:18:38 [scrapy] INFO: Enabled item pipelines:
['scrapy.pipelines.images.ImagesPipeline',
'car.pipelines.MongoDBPipeline',
'car.pipelines.Car58ImagesPipeline']
2016-06-14 14:18:38 [scrapy] INFO: Spider opened
2016-06-14 14:18:38 [scrapy] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
2016-06-14 14:18:38 [scrapy] DEBUG: Telnet console listening on 127.0.0.1:6023
2016-06-14 14:18:38 [scrapy] DEBUG: Crawled (302) <GET http://www.51auto.com/quanguo/pabmdcigf?searchtype=searcarlist&curentPage=1&isNewsCar=0&isSaleCar=0&isQa=0&orderValue=record_time> (referer: None)
**2016-06-14 14:18:39 [scrapy] ERROR: Spider error processing <GET http://www.51auto.com/quanguo/pabmdcigf?searchtype=searcarlist&curentPage=1&isNewsCar=0&isSaleCar=0&isQa=0&orderValue=record_time> (referer: None)**
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/utils/defer.py", line 102, in iter_errback
yield next(it)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/spidermiddlewares/offsite.py", line 29, in process_spider_output
for x in result:
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/spidermiddlewares/referer.py", line 22, in <genexpr>
return (_set_referer(r) for r in result or ())
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/spidermiddlewares/urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/scrapy/spidermiddlewares/depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "/Users/mayuping/PycharmProjects/car/car/spiders/car51.py", line 22, in parse_item
trs = response.xpath("//div[#class='view-grid-overflow']/a").extract()
AttributeError: 'Response' object has no attribute 'xpath'
2016-06-14 14:18:39 [scrapy] INFO: Closing spider (finished)
2016-06-14 14:18:39 [scrapy] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 351,
'downloader/request_count': 1,
'downloader/request_method_count/GET': 1,
'downloader/response_bytes': 420,
'downloader/response_count': 1,
'downloader/response_status_count/302': 1,
'finish_reason': 'finished',
'finish_time': datetime.datetime(2016, 6, 14, 6, 18, 39, 56461),
'log_count/DEBUG': 2,
'log_count/ERROR': 1,
'log_count/INFO': 7,
'log_count/WARNING': 2,
'response_received_count': 1,
'scheduler/dequeued': 1,
'scheduler/dequeued/memory': 1,
'scheduler/enqueued': 1,
'scheduler/enqueued/memory': 1,
'spider_exceptions/AttributeError': 1,
'start_time': datetime.datetime(2016, 6, 14, 6, 18, 38, 437336)}
2016-06-14 14:18:39 [scrapy] INFO: Spider closed (finished)

When you add handle_httpstatus_list = [302,301] you're telling Scrapy to call your callback even for HTTP redirection, instead of letting the framework handle the redirection transparently for you (which is the default).
Some HTTP responses for redirections do NOT have bodies nor content headers, so in those cases, in your callback, Scrapy hands you the response as-is, i.e. a plain Response object, and not an HtmlResponse for which you have .xpath() and .css() shortcuts.
Either you really need to handle HTTP 301 and 302 responses, and you need to write your callback so it tests the status code (response.status), extracting data only in the non-3xx cases,
Or, you let Scrapy handle HTTP redirections for you and you need to remove handle_httpstatus_list in your spider.

Related

Stop Scrapy from logging spider output to Visual Studio Code terminal

Whenever I run my spider scrapy crawl test -O test.json in my Visual Studio Code terminal I get output like this:
2023-01-31 14:31:45 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.example.com/product/1
{'price': 100,
'newprice': 90
}
2023-01-31 14:31:50 [scrapy.core.engine] INFO: Closing spider (finished)
2023-01-31 14:31:50 [scrapy.extensions.feedexport] INFO: Stored json feed (251 items) in: test.json
2023-01-31 14:31:50 [selenium.webdriver.remote.remote_connection] DEBUG: DELETE http://localhost:61169/session/996866d968ab791730e4f6d87ce2a1ea {}
2023-01-31 14:31:50 [urllib3.connectionpool] DEBUG: http://localhost:61169 "DELETE /session/996866d968ab791730e4f6d87ce2a1ea HTTP/1.1" 200 14
2023-01-31 14:31:50 [selenium.webdriver.remote.remote_connection] DEBUG: Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
2023-01-31 14:31:50 [selenium.webdriver.remote.remote_connection] DEBUG: Finished Request
2023-01-31 14:31:52 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 91321,
'downloader/request_count': 267,
'downloader/request_method_count/GET': 267,
'downloader/response_bytes': 2730055,
'downloader/response_count': 267,
'downloader/response_status_count/200': 267,
'dupefilter/filtered': 121,
'elapsed_time_seconds': 11.580893,
'feedexport/success_count/FileFeedStorage': 1,
'finish_reason': 'finished',
'finish_time': datetime.datetime(2023, 1, 31, 13, 31, 50, 495392),
'httpcompression/response_bytes': 9718676,
'httpcompression/response_count': 267,
'item_scraped_count': 251,
'log_count/DEBUG': 537,
'log_count/INFO': 11,
'request_depth_max': 2,
'response_received_count': 267,
'scheduler/dequeued': 267,
'scheduler/dequeued/memory': 267,
'scheduler/enqueued': 267,
'scheduler/enqueued/memory': 267,
'start_time': datetime.datetime(2023, 1, 31, 13, 31, 38, 914499)}
2023-01-31 14:31:52 [scrapy.core.engine] INFO: Spider closed (finished)
I want to log all this, including the print('hi') lines in my Spider but I DON'T want the spider output logged, in this case {'price': 100, 'newprice': 90 }.
Inspecting the above I think I need to disable only the downloader/response_bytes.
I've been reading this https://docs.scrapy.org/en/latest/topics/logging.html, but I'm not sure where or how to configure my exact use case. I have hundreds of spiders and I don't want to have to add a configuration in each, but rather apply the loggin config to all spiders. Do I need to add a separate config file or add to an existing like scrapy.cfg?
UPDATE 1
So here's my folder structure where I created settings.py:
Scrapy\
tt_spiders\
myspiders\
spider1.py
spider2.py
settings.py
middlewares.py
pipelines.py
settings.py
scrapy.cfg
settings.py
settings.py
if __name__ == "__main__":
disable_list = ['scrapy.core.engine', 'scrapy.core.scraper', 'scrapy.spiders']
for element in disable_list:
logger = logging.getLogger(element)
logger.disabled = True
spider = 'example_spider'
settings = get_project_settings()
settings['USER_AGENT'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
process = CrawlerProcess(settings)
process.crawl(spider)
process.start()
This throws 3 errors, which makes sense as I have not defined these:
"logging" is not defined
"get_project_settings" is not defined
"CrawlerProcess" is not defined
But more importantly, what I don't understand, this code contains spider = 'example_spider',
where I want this logic to apply to ALL spiders.
So I reduced it to:
if __name__ == "__main__":
disable_list = ['scrapy.core.scraper']
But still the output is logged. What am I missing?
Let's assume that we have this spider:
spider.py:
import scrapy
class ExampleSpider(scrapy.Spider):
name = 'example_spider'
allowed_domains = ['scrapingclub.com']
start_urls = ['https://scrapingclub.com/exercise/detail_basic/']
def parse(self, response):
item = dict()
item['title'] = response.xpath('//h3/text()').get()
item['price'] = response.xpath('//div[#class="card-body"]/h4/text()').get()
yield item
And its output is:
...
[scrapy.middleware] INFO: Enabled item pipelines:
[]
[scrapy.core.engine] INFO: Spider opened
[scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
[scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
[scrapy.core.engine] DEBUG: Crawled (200) <GET https://scrapingclub.com/exercise/detail_basic/> (referer: None)
[scrapy.core.scraper] DEBUG: Scraped from <200 https://scrapingclub.com/exercise/detail_basic/>
{'title': 'Long-sleeved Jersey Top', 'price': '$12.99'}
[scrapy.core.engine] INFO: Closing spider (finished)
[scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 329,
'downloader/request_count': 1,
...
If you want to disable logging for specific line then just copy the text inside the square brackets and disable its logger.
e.g.: [scrapy.core.scraper] DEBUG: Scraped from <200 https://scrapingclub.com/exercise/detail_basic/>.
main.py:
if __name__ == "__main__":
disable_list = ['scrapy.core.engine', 'scrapy.core.scraper', 'scrapy.spiders']
for element in disable_list:
logger = logging.getLogger(element)
logger.disabled = True
spider = 'example_spider'
settings = get_project_settings()
settings['USER_AGENT'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
process = CrawlerProcess(settings)
process.crawl(spider)
process.start()
If you want to disable some of the extensions you can set them to None in settings.py:
EXTENSIONS = {
'scrapy.extensions.telnet': None,
'scrapy.extensions.logstats.LogStats': None,
'scrapy.extensions.corestats.CoreStats': None
}
Update 1:
Add just this to settings.py:
import logging
disable_list = ['scrapy.core.engine', 'scrapy.core.scraper', 'scrapy.spiders']
for element in disable_list:
logger = logging.getLogger(element)
logger.disabled = True

Foswiki plugin on Perl/FCGI failed to use File:Find at the 5th attempt

I am writing a REST plugin for Foswiki using Perl and I am facing an reliability issue when using File::Find. I have tried my best to write a minimal reproducible example. The plugin uses File::Find to traverse directories and print the filenames in the HTTP response. The REST request is working properly 4 times, but stop to work the 5th time. The HTTP status remain “HTTP/1.1 200 OK” but no file is reported by File::Find anymore.
The webserver is nginx and is configured to use FastCGI. It appear to run 4 working threads managed by foswiki-fcgi-pm:
> ps aux
www-data 16957 0.0 7.7 83412 78332 ? Ss 16:52 0:00 foswiki-fcgi-pm
www-data 16960 0.0 7.5 83960 76740 ? S 16:52 0:00 foswiki-fcgi
www-data 16961 0.0 7.6 84004 76828 ? S 16:52 0:00 foswiki-fcgi
www-data 16962 0.0 7.6 83956 76844 ? S 16:52 0:00 foswiki-fcgi
www-data 16963 0.0 7.5 83960 76740 ? S 16:52 0:00 foswiki-fcgi
Firstly, the plugin initialization simply register the REST handler:
sub initPlugin {
my ( $topic, $web, $user, $installWeb ) = #_;
# check for Plugins.pm versions
if ( $Foswiki::Plugins::VERSION < 2.3 ) {
Foswiki::Func::writeWarning( 'Version mismatch between ',
__PACKAGE__, ' and Plugins.pm' );
return 0;
}
Foswiki::Func::registerRESTHandler(
'restbug', \&RestBug,
authenticate => 0, # Set to 0 if handler should be useable by WikiGuest
validate => 0, # Set to 0 to disable StrikeOne CSRF protection
http_allow => 'GET,POST', # Set to 'GET,POST' to allow use HTTP GET and POST
description => 'Debug'
);
# Plugin correctly initialized
return 1;
}
Secondly, the REST handler is implemented as follow, printing all the files it can possibly find:
sub RestBug {
my ($session, $subject, $verb, $response) = #_;
my #Directories = ("/var/www/foswiki/tools");
sub findfilestest
{
$response->print("FILE $_\n");
}
find({ wanted => \&findfilestest }, #Directories );
}
When I test the REST service with a HTTP request, the first 4 times I get the following HTTP response, which seems quite satisfying:
HTTP/1.1 200 OK
Server: nginx/1.14.2
Date: Tue, 22 Nov 2022 09:23:10 GMT
Content-Length: 541
Connection: keep-alive
Set-Cookie: SFOSWIKISID=385db599c5d66bb19591e1eef7f1a854; path=/; secure; HttpOnly
FILE .
FILE foswiki.freebsd.init-script
FILE bulk_copy.pl
FILE dependencies
FILE mod_perl_startup.pl
FILE geturl.pl
FILE extender.pl
FILE extension_installer
FILE configure
FILE lighttpd.pl
FILE foswiki.freebsd.etc-defaults
FILE save-pending-checkins
FILE babelify
FILE upgrade_emails.pl
FILE tick_foswiki.pl
FILE foswiki.defaults
FILE rewriteshebang.pl
FILE fix_file_permissions.sh
FILE foswiki.init-script
FILE convertTopicSettings.pl
FILE mailnotify
FILE html2tml.pl
FILE tml2html.pl
FILE systemd
FILE foswiki.service
The following attempts give this unexpected response:
HTTP/1.1 200 OK
Server: nginx/1.14.2
Date: Tue, 22 Nov 2022 09:24:56 GMT
Transfer-Encoding: chunked
Connection: keep-alive
Set-Cookie: SFOSWIKISID=724b2c4b1ddfbebd25d0dc2a0f182142; path=/; secure; HttpOnly
Note that if I restart Foswiki with the command systemctl restart foswiki, the REST service work again 4 more times.
How to make this REST service work more than 4 times in a row?

unable to upload an image file to slack through hubot

I am trying to get an image of a web page using pageres package, And post the image to slack using hubot. I am able to get the image, but for some reason i am not able to post it to slack using slack upload api. Here is my code, can you tell me what could be wrong? (not a coffee lint issue)
fs = require("fs")
Pageres = require('pageres')
util = require("util")
request = require("request")
module.exports = (robot) ->
robot.respond /screenshot page (\S*)?( at )?(\S*)?/i, (msg) ->
pageres = new Pageres({delay: 30})
domain = msg.match[1].replace("http://", "")
if msg.match[3] == undefined
size = '960x1024'
else
size = msg.match[3]
dest = './screenshots'
msg.send "Acquiring screenshot of #{domain}"
pageres.src(domain, [size]).dest(dest)
pageres.run (err) ->
if err
robot.logger.error err
msg.send "Um..., you better check the log"
else
opts = {
method: 'POST',
uri: 'https://slack.com/api/files.upload',
formData: {
channels: process.env.HUBOT_SCREENSHOT_SLACK_CHANNEL,
initial_comment: "Screenshot of #{domain}",
token: process.env.HUBOT_SLACK_TOKEN,
file: fs.createReadStream("#{dest}/#{domain}.png")
}
}
request.post opts, (error, response, body) ->
if error
robot.logger.error error
else
robot.logger.debug 'screenshot posted to slack'
return
The bot is connected to slack, and receiving messages from slack, parsing them fine and getting the image back to the local destination, but not able to post it to slack, There are no errors as well in the log.
[Wed Apr 11 2018 16:16:47 GMT+0000 (UTC)] DEBUG Received message: '#hubot screenshot page http://www.google.com' in channel: ****, from: ******
[Wed Apr 11 2018 16:16:47 GMT+0000 (UTC)] DEBUG Message '#hubot screenshot page http://www.google.com' matched regex //^\s*[#]?hubot[:,]?\s*(?:screenshot page (\S*)?( at )?(\S*)?)/i/; listener.options = { id: null }
[Wed Apr 11 2018 16:16:47 GMT+0000 (UTC)] DEBUG Executing listener callback for Message '#hubot screenshot page http://www.google.com'
[Wed Apr 11 2018 16:16:47 GMT+0000 (UTC)] DEBUG Sending to *****: Acquiring screenshot of www.google.com
You can use curl command which can be called using child_process to upload a file in the channel.
curl -F file=#dramacat.gif -F channels=C024BE91L,#general -F token=xxxx-xxxxxxxxx-xxxx https://slack.com/api/files.upload
It seems the formData property in your opts variable should be slightly different like this:
formData: {
token: process.env.HUBOT_SLACK_TOKEN,
title: "Screenshot of #{domain}",
filename: "image.png",
filetype: "auto",
channels: channel_id,
file: fs.createReadStream("path_to_your_image"),
}
The channel_id is your slack channel id which you can see in the browser address bar when you access the channel.

Pycuda test_driver.py raises Attribute Error

I'm trying to install pycuda on Linux Mint with a GeForce 960M and Cuda 8.0 installed. When I run the test_driver.py script it outputs the following error:
============================= test session starts ==============================
platform linux2 -- Python 2.7.12, pytest-3.0.3, py-1.4.31, pluggy-0.4.0
rootdir: /home/milton/Downloads/pycuda-2016.1.2, inifile:
collected 28 items
test_driver.py ...................x.....F..
=================================== FAILURES ===================================
________________________ TestDriver.test_multi_context _________________________
args = (,), kwargs = {}
pycuda = <module 'pycuda' from '/home/milton/miniconda2/lib/python2.7/site-packages/pycuda-2016.1.2-py2.7-linux-x86_64.egg/pycuda/init.pyc'>
ctx = <pycuda._driver.Context object at 0x7f540e39d758>
clear_context_caches = <function clear_context_caches at 0x7f540ee26758>
collect =<built-in function collect>
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
inner_f(*args, **kwargs)
../../../miniconda2/lib/python2.7/site-packages/pycuda-2016.1.2-py2.7-linux-x86_64.egg/pycuda/tools.py:460:
self = <test_driver.TestDriver instance at 0x7f540c21fc20>
#mark_cuda_test
def test_multi_context(self):
if drv.get_version() < (2,0,0):
return
if drv.get_version() >= (2,2,0):
if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE:
E AttributeError: type object 'compute_mode' has no attribute 'EXCLUSIVE'
test_driver.py:638: AttributeError
================ 1 failed, 26 passed, 1 xfailed in 6.92 seconds ================
python driver compute mode only supports following modes:
DEFAULT,
PROHIBITED,
EXCLUSIVE_PROCESS
so please change this:
if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE:
to
if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE_PROCESS:
in your test_driver.py file

500 internal server error on certain page after a few hours

I am getting a 500 Internal Server Error on a certain page of my site after a few hours of being up. I restart uWSGI instance with uwsgi --ini /home/metheuser/webapps/ers_portal/ers_portal_uwsgi.ini and it works again for a few hours.
The rest of the site seems to be working. When I navigate to my_table, I am directed to the login page. But, I get the 500 error on my table page on login. I followed the instructions here to set up my nginx and uwsgi configs.
That is, I have ers_portal_nginx.conf located i my app folder that is symlinked to /etc/nginx/conf.d/. I start my uWSGI "instance" (not sure what exactly to call it) in a Screen instance as mentioned above, with the .ini file located in my app folder
My ers_portal_nginx.conf:
server {
listen 80;
server_name www.mydomain.com;
location / { try_files $uri #app; }
location #app {
include uwsgi_params;
uwsgi_pass unix:/home/metheuser/webapps/ers_portal/run_web_uwsgi.sock;
}
}
My ers_portal_uwsgi.ini:
[uwsgi]
#user info
uid = metheuser
gid = ers_group
#application's base folder
base = /home/metheuser/webapps/ers_portal
#python module to import
app = run_web
module = %(app)
home = %(base)/ers_portal_venv
pythonpath = %(base)
#socket file's location
socket = /home/metheuser/webapps/ers_portal/%n.sock
#permissions for the socket file
chmod-socket = 666
#uwsgi varible only, does not relate to your flask application
callable = app
#location of log files
logto = /home/metheuser/webapps/ers_portal/logs/%n.log
Relevant parts of my views.py
data_modification_time = None
data = None
def reload_data():
global data_modification_time, data, sites, column_names
filename = '/home/metheuser/webapps/ers_portal/app/static/' + ec.dd_filename
mtime = os.stat(filename).st_mtime
if data_modification_time != mtime:
data_modification_time = mtime
with open(filename) as f:
data = pickle.load(f)
return data
#a bunch of authentication stuff...
#app.route('/')
#app.route('/index')
def index():
return render_template("index.html",
title = 'Main',)
#app.route('/login', methods = ['GET', 'POST'])
def login():
login stuff...
#app.route('/my_table')
#login_required
def my_table():
print 'trying to access data table...'
data = reload_data()
return render_template("my_table.html",
title = "Rundata Viewer",
sts = sites,
cn = column_names,
data = data) # dictionary of data
I installed nginx via yum as described here (yesterday)
I am using uWSGI installed in my venv via pip
I am on CentOS 6
My uwsgi log shows:
Wed Jun 11 17:20:01 2014 - uwsgi_response_writev_headers_and_body_do(): Broken pipe [core/writer.c line 287] during GET /whm-server-status (127.0.0.1)
IOError: write error
[pid: 9586|app: 0|req: 135/135] 127.0.0.1 () {24 vars in 292 bytes} [Wed Jun 11 17:20:01 2014] GET /whm-server-status => generated 0 bytes in 3 msecs (HTTP/1.0 404) 2 headers in 0 bytes (0 switches on core 0)
When its working, the print statement in the views "my_table" route prints into the log file. But not once it stops working.
Any ideas?