by
Changes to 10 files · Browse files at fe177af16c32 Showing diff from parent 7001fe04f499 6b390b445b06 Diff from another changeset...
@@ -5,3 +5,7 @@ 3294042cf555116765f8a23ec96619b02575cc22 0.9.5
45180a34d6b2df2f157b19ff44d4b5b6c51a5485 0.9.5
6779c9cb8a2a2e7cbfc1e6fb42c5972eb63f18d5 0.9.6
+1234dd4a7748ebd9b8355ff4eb717b278475e121 0.9.5-20140516
+fef0f3af20820f2c35d4ccfd90eef0ce5f4039e3 0.9.5-20140522
+ce9989f282e77f6d0c84de50eb80a0534b62395a 0.9.5-20140603
+b937588cfa55c91b9e5c5d17c59075593a0b70d0 0.9.5.post20140603
|
This file's diff was not loaded because this changeset is very large. Load changes Loading... |
|
|
@@ -1,898 +1,0 @@ - # -*- coding: utf-8 -*-
-"""Unit tests for Beautiful Soup.
-
-These tests make sure the Beautiful Soup works as it should. If you
-find a bug in Beautiful Soup, the best way to express it is as a test
-case like this that fails."""
-
-import unittest
-from BeautifulSoup import *
-
-class SoupTest(unittest.TestCase):
-
- def assertSoupEquals(self, toParse, rep=None, c=BeautifulSoup):
- """Parse the given text and make sure its string rep is the other
- given text."""
- if rep == None:
- rep = toParse
- self.assertEqual(str(c(toParse)), rep)
-
-
-class FollowThatTag(SoupTest):
-
- "Tests the various ways of fetching tags from a soup."
-
- def setUp(self):
- ml = """
- <a id="x">1</a>
- <A id="a">2</a>
- <b id="b">3</a>
- <b href="foo" id="x">4</a>
- <ac width=100>4</ac>"""
- self.soup = BeautifulStoneSoup(ml)
-
- def testFindAllByName(self):
- matching = self.soup('a')
- self.assertEqual(len(matching), 2)
- self.assertEqual(matching[0].name, 'a')
- self.assertEqual(matching, self.soup.findAll('a'))
- self.assertEqual(matching, self.soup.findAll(SoupStrainer('a')))
-
- def testFindAllByAttribute(self):
- matching = self.soup.findAll(id='x')
- self.assertEqual(len(matching), 2)
- self.assertEqual(matching[0].name, 'a')
- self.assertEqual(matching[1].name, 'b')
-
- matching2 = self.soup.findAll(attrs={'id' : 'x'})
- self.assertEqual(matching, matching2)
-
- strainer = SoupStrainer(attrs={'id' : 'x'})
- self.assertEqual(matching, self.soup.findAll(strainer))
-
- self.assertEqual(len(self.soup.findAll(id=None)), 1)
-
- self.assertEqual(len(self.soup.findAll(width=100)), 1)
- self.assertEqual(len(self.soup.findAll(junk=None)), 5)
- self.assertEqual(len(self.soup.findAll(junk=[1, None])), 5)
-
- self.assertEqual(len(self.soup.findAll(junk=re.compile('.*'))), 0)
- self.assertEqual(len(self.soup.findAll(junk=True)), 0)
-
- self.assertEqual(len(self.soup.findAll(junk=True)), 0)
- self.assertEqual(len(self.soup.findAll(href=True)), 1)
-
- def testFindallByClass(self):
- soup = BeautifulSoup('<b class="foo">Foo</b><a class="1 23 4">Bar</a>')
- self.assertEqual(soup.find(attrs='foo').string, "Foo")
- self.assertEqual(soup.find('a', '1').string, "Bar")
- self.assertEqual(soup.find('a', '23').string, "Bar")
- self.assertEqual(soup.find('a', '4').string, "Bar")
-
- self.assertEqual(soup.find('a', '2'), None)
-
- def testFindAllByList(self):
- matching = self.soup(['a', 'ac'])
- self.assertEqual(len(matching), 3)
-
- def testFindAllByHash(self):
- matching = self.soup({'a' : True, 'b' : True})
- self.assertEqual(len(matching), 4)
-
- def testFindAllText(self):
- soup = BeautifulSoup("<html>\xbb</html>")
- self.assertEqual(soup.findAll(text=re.compile('.*')),
- [u'\xbb'])
-
- def testFindAllByRE(self):
- import re
- r = re.compile('a.*')
- self.assertEqual(len(self.soup(r)), 3)
-
- def testFindAllByMethod(self):
- def matchTagWhereIDMatchesName(tag):
- return tag.name == tag.get('id')
-
- matching = self.soup.findAll(matchTagWhereIDMatchesName)
- self.assertEqual(len(matching), 2)
- self.assertEqual(matching[0].name, 'a')
-
- def testFindByIndex(self):
- """For when you have the tag and you want to know where it is."""
- tag = self.soup.find('a', id="a")
- self.assertEqual(self.soup.index(tag), 3)
-
- # It works for NavigableStrings as well.
- s = tag.string
- self.assertEqual(tag.index(s), 0)
-
- # If the tag isn't present, a ValueError is raised.
- soup2 = BeautifulSoup("<b></b>")
- tag2 = soup2.find('b')
- self.assertRaises(ValueError, self.soup.index, tag2)
-
- def testConflictingFindArguments(self):
- """The 'text' argument takes precedence."""
- soup = BeautifulSoup('Foo<b>Bar</b>Baz')
- self.assertEqual(soup.find('b', text='Baz'), 'Baz')
- self.assertEqual(soup.findAll('b', text='Baz'), ['Baz'])
-
- self.assertEqual(soup.find(True, text='Baz'), 'Baz')
- self.assertEqual(soup.findAll(True, text='Baz'), ['Baz'])
-
- def testParents(self):
- soup = BeautifulSoup('<ul id="foo"></ul><ul id="foo"><ul><ul id="foo" a="b"><b>Blah')
- b = soup.b
- self.assertEquals(len(b.findParents('ul', {'id' : 'foo'})), 2)
- self.assertEquals(b.findParent('ul')['a'], 'b')
-
- PROXIMITY_TEST = BeautifulSoup('<b id="1"><b id="2"><b id="3"><b id="4">')
-
- def testNext(self):
- soup = self.PROXIMITY_TEST
- b = soup.find('b', {'id' : 2})
- self.assertEquals(b.findNext('b')['id'], '3')
- self.assertEquals(b.findNext('b')['id'], '3')
- self.assertEquals(len(b.findAllNext('b')), 2)
- self.assertEquals(len(b.findAllNext('b', {'id' : 4})), 1)
-
- def testPrevious(self):
- soup = self.PROXIMITY_TEST
- b = soup.find('b', {'id' : 3})
- self.assertEquals(b.findPrevious('b')['id'], '2')
- self.assertEquals(b.findPrevious('b')['id'], '2')
- self.assertEquals(len(b.findAllPrevious('b')), 2)
- self.assertEquals(len(b.findAllPrevious('b', {'id' : 2})), 1)
-
-
- SIBLING_TEST = BeautifulSoup('<blockquote id="1"><blockquote id="1.1"></blockquote></blockquote><blockquote id="2"><blockquote id="2.1"></blockquote></blockquote><blockquote id="3"><blockquote id="3.1"></blockquote></blockquote><blockquote id="4">')
-
- def testNextSibling(self):
- soup = self.SIBLING_TEST
- tag = 'blockquote'
- b = soup.find(tag, {'id' : 2})
- self.assertEquals(b.findNext(tag)['id'], '2.1')
- self.assertEquals(b.findNextSibling(tag)['id'], '3')
- self.assertEquals(b.findNextSibling(tag)['id'], '3')
- self.assertEquals(len(b.findNextSiblings(tag)), 2)
- self.assertEquals(len(b.findNextSiblings(tag, {'id' : 4})), 1)
-
- def testPreviousSibling(self):
- soup = self.SIBLING_TEST
- tag = 'blockquote'
- b = soup.find(tag, {'id' : 3})
- self.assertEquals(b.findPrevious(tag)['id'], '2.1')
- self.assertEquals(b.findPreviousSibling(tag)['id'], '2')
- self.assertEquals(b.findPreviousSibling(tag)['id'], '2')
- self.assertEquals(len(b.findPreviousSiblings(tag)), 2)
- self.assertEquals(len(b.findPreviousSiblings(tag, id=1)), 1)
-
- def testTextNavigation(self):
- soup = BeautifulSoup('Foo<b>Bar</b><i id="1"><b>Baz<br />Blee<hr id="1"/></b></i>Blargh')
- baz = soup.find(text='Baz')
- self.assertEquals(baz.findParent("i")['id'], '1')
- self.assertEquals(baz.findNext(text='Blee'), 'Blee')
- self.assertEquals(baz.findNextSibling(text='Blee'), 'Blee')
- self.assertEquals(baz.findNextSibling(text='Blargh'), None)
- self.assertEquals(baz.findNextSibling('hr')['id'], '1')
-
-class SiblingRivalry(SoupTest):
- "Tests the nextSibling and previousSibling navigation."
-
- def testSiblings(self):
- soup = BeautifulSoup("<ul><li>1<p>A</p>B<li>2<li>3</ul>")
- secondLI = soup.find('li').nextSibling
- self.assert_(secondLI.name == 'li' and secondLI.string == '2')
- self.assertEquals(soup.find(text='1').nextSibling.name, 'p')
- self.assertEquals(soup.find('p').nextSibling, 'B')
- self.assertEquals(soup.find('p').nextSibling.previousSibling.nextSibling, 'B')
-
-class TagsAreObjectsToo(SoupTest):
- "Tests the various built-in functions of Tag objects."
-
- def testLen(self):
- soup = BeautifulSoup("<top>1<b>2</b>3</top>")
- self.assertEquals(len(soup.top), 3)
-
-class StringEmUp(SoupTest):
- "Tests the use of 'string' as an alias for a tag's only content."
-
- def testString(self):
- s = BeautifulSoup("<b>foo</b>")
- self.assertEquals(s.b.string, 'foo')
-
- def testLackOfString(self):
- s = BeautifulSoup("<b>f<i>e</i>o</b>")
- self.assert_(not s.b.string)
-
- def testStringAssign(self):
- s = BeautifulSoup("<b></b>")
- b = s.b
- b.string = "foo"
- string = b.string
- self.assertEquals(string, "foo")
- self.assert_(isinstance(string, NavigableString))
-
-class AllText(SoupTest):
- "Tests the use of 'text' to get all of string content from the tag."
-
- def testText(self):
- soup = BeautifulSoup("<ul><li>spam</li><li>eggs</li><li>cheese</li>")
- self.assertEquals(soup.ul.text, "spameggscheese")
- self.assertEquals(soup.ul.getText('/'), "spam/eggs/cheese")
-
-class ThatsMyLimit(SoupTest):
- "Tests the limit argument."
-
- def testBasicLimits(self):
- s = BeautifulSoup('<br id="1" /><br id="1" /><br id="1" /><br id="1" />')
- self.assertEquals(len(s.findAll('br')), 4)
- self.assertEquals(len(s.findAll('br', limit=2)), 2)
- self.assertEquals(len(s('br', limit=2)), 2)
-
-class OnlyTheLonely(SoupTest):
- "Tests the parseOnly argument to the constructor."
- def setUp(self):
- x = []
- for i in range(1,6):
- x.append('<a id="%s">' % i)
- for j in range(100,103):
- x.append('<b id="%s.%s">Content %s.%s</b>' % (i,j, i,j))
- x.append('</a>')
- self.x = ''.join(x)
-
- def testOnly(self):
- strainer = SoupStrainer("b")
- soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
- self.assertEquals(len(soup), 15)
-
- strainer = SoupStrainer(id=re.compile("100.*"))
- soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
- self.assertEquals(len(soup), 5)
-
- strainer = SoupStrainer(text=re.compile("10[01].*"))
- soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
- self.assertEquals(len(soup), 10)
-
- strainer = SoupStrainer(text=lambda(x):x[8]=='3')
- soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
- self.assertEquals(len(soup), 3)
-
-class PickleMeThis(SoupTest):
- "Testing features like pickle and deepcopy."
-
- def setUp(self):
- self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
-"http://www.w3.org/TR/REC-html40/transitional.dtd">
-<html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-<title>Beautiful Soup: We called him Tortoise because he taught us.</title>
-<link rev="made" href="mailto:leonardr@segfault.org">
-<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping.">
-<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)">
-<meta name="author" content="Leonard Richardson">
-</head>
-<body>
-<a href="foo">foo</a>
-<a href="foo"><b>bar</b></a>
-</body>
-</html>"""
-
- self.soup = BeautifulSoup(self.page)
-
- def testPickle(self):
- import pickle
- dumped = pickle.dumps(self.soup, 2)
- loaded = pickle.loads(dumped)
- self.assertEqual(loaded.__class__, BeautifulSoup)
- self.assertEqual(str(loaded), str(self.soup))
-
- def testDeepcopy(self):
- from copy import deepcopy
- copied = deepcopy(self.soup)
- self.assertEqual(str(copied), str(self.soup))
-
- def testUnicodePickle(self):
- import cPickle as pickle
- html = "<b>" + chr(0xc3) + "</b>"
- soup = BeautifulSoup(html)
- dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
- loaded = pickle.loads(dumped)
- self.assertEqual(str(loaded), str(soup))
-
-
-class WriteOnlyCode(SoupTest):
- "Testing the modification of the tree."
-
- def testModifyAttributes(self):
- soup = BeautifulSoup('<a id="1"></a>')
- soup.a['id'] = 2
- self.assertEqual(soup.renderContents(), '<a id="2"></a>')
- del(soup.a['id'])
- self.assertEqual(soup.renderContents(), '<a></a>')
- soup.a['id2'] = 'foo'
- self.assertEqual(soup.renderContents(), '<a id2="foo"></a>')
-
- def testNewTagCreation(self):
- "Makes sure tags don't step on each others' toes."
- soup = BeautifulSoup()
- a = Tag(soup, 'a')
- ol = Tag(soup, 'ol')
- a['href'] = 'http://foo.com/'
- self.assertRaises(KeyError, lambda : ol['href'])
-
- def testNewTagWithAttributes(self):
- """Makes sure new tags can be created complete with attributes."""
- soup = BeautifulSoup()
- a = Tag(soup, 'a', [('href', 'foo')])
- b = Tag(soup, 'b', {'class':'bar'})
- soup.insert(0,a)
- soup.insert(1,b)
- self.assertEqual(soup.a['href'], 'foo')
- self.assertEqual(soup.b['class'], 'bar')
-
- def testTagReplacement(self):
- # Make sure you can replace an element with itself.
- text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>"
- soup = BeautifulSoup(text)
- c = soup.c
- soup.c.replaceWith(c)
- self.assertEquals(str(soup), text)
-
- # A very simple case
- soup = BeautifulSoup("<b>Argh!</b>")
- soup.find(text="Argh!").replaceWith("Hooray!")
- newText = soup.find(text="Hooray!")
- b = soup.b
- self.assertEqual(newText.previous, b)
- self.assertEqual(newText.parent, b)
- self.assertEqual(newText.previous.next, newText)
- self.assertEqual(newText.next, None)
-
- # A more complex case
- soup = BeautifulSoup("<a><b>Argh!</b><c></c><d></d></a>")
- soup.b.insert(1, "Hooray!")
- newText = soup.find(text="Hooray!")
- self.assertEqual(newText.previous, "Argh!")
- self.assertEqual(newText.previous.next, newText)
-
- self.assertEqual(newText.previousSibling, "Argh!")
- self.assertEqual(newText.previousSibling.nextSibling, newText)
-
- self.assertEqual(newText.nextSibling, None)
- self.assertEqual(newText.next, soup.c)
-
- text = "<html>There's <b>no</b> business like <b>show</b> business</html>"
- soup = BeautifulSoup(text)
- no, show = soup.findAll('b')
- show.replaceWith(no)
- self.assertEquals(str(soup), "<html>There's business like <b>no</b> business</html>")
-
- # Even more complex
- soup = BeautifulSoup("<a><b>Find</b><c>lady!</c><d></d></a>")
- tag = Tag(soup, 'magictag')
- tag.insert(0, "the")
- soup.a.insert(1, tag)
-
- b = soup.b
- c = soup.c
- theText = tag.find(text=True)
- findText = b.find(text="Find")
-
- self.assertEqual(findText.next, tag)
- self.assertEqual(tag.previous, findText)
- self.assertEqual(b.nextSibling, tag)
- self.assertEqual(tag.previousSibling, b)
- self.assertEqual(tag.nextSibling, c)
- self.assertEqual(c.previousSibling, tag)
-
- self.assertEqual(theText.next, c)
- self.assertEqual(c.previous, theText)
-
- # Aand... incredibly complex.
- soup = BeautifulSoup("""<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""")
- f = soup.f
- a = soup.a
- c = soup.c
- e = soup.e
- weText = a.find(text="We")
- soup.b.replaceWith(soup.f)
- self.assertEqual(str(soup), "<a>We<f>refuse</f></a><e>to<g>service</g></e>")
-
- self.assertEqual(f.previous, weText)
- self.assertEqual(weText.next, f)
- self.assertEqual(f.previousSibling, weText)
- self.assertEqual(f.nextSibling, None)
- self.assertEqual(weText.nextSibling, f)
-
- def testReplaceWithChildren(self):
- soup = BeautifulStoneSoup(
- "<top><replace><child1/><child2/></replace></top>",
- selfClosingTags=["child1", "child2"])
- soup.replaceTag.replaceWithChildren()
- self.assertEqual(soup.top.contents[0].name, "child1")
- self.assertEqual(soup.top.contents[1].name, "child2")
-
- def testAppend(self):
- doc = "<p>Don't leave me <b>here</b>.</p> <p>Don't leave me.</p>"
- soup = BeautifulSoup(doc)
- second_para = soup('p')[1]
- bold = soup.find('b')
- soup('p')[1].append(soup.find('b'))
- self.assertEqual(bold.parent, second_para)
- self.assertEqual(str(soup),
- "<p>Don't leave me .</p> "
- "<p>Don't leave me.<b>here</b></p>")
-
- def testTagExtraction(self):
- # A very simple case
- text = '<html><div id="nav">Nav crap</div>Real content here.</html>'
- soup = BeautifulSoup(text)
- extracted = soup.find("div", id="nav").extract()
- self.assertEqual(str(soup), "<html>Real content here.</html>")
- self.assertEqual(str(extracted), '<div id="nav">Nav crap</div>')
-
- # A simple case, a more complex test.
- text = "<doc><a>1<b>2</b></a><a>i<b>ii</b></a><a>A<b>B</b></a></doc>"
- soup = BeautifulStoneSoup(text)
- doc = soup.doc
- numbers, roman, letters = soup("a")
-
- self.assertEqual(roman.parent, doc)
- oldPrevious = roman.previous
- endOfThisTag = roman.nextSibling.previous
- self.assertEqual(oldPrevious, "2")
- self.assertEqual(roman.next, "i")
- self.assertEqual(endOfThisTag, "ii")
- self.assertEqual(roman.previousSibling, numbers)
- self.assertEqual(roman.nextSibling, letters)
-
- roman.extract()
- self.assertEqual(roman.parent, None)
- self.assertEqual(roman.previous, None)
- self.assertEqual(roman.next, "i")
- self.assertEqual(letters.previous, '2')
- self.assertEqual(roman.previousSibling, None)
- self.assertEqual(roman.nextSibling, None)
- self.assertEqual(endOfThisTag.next, None)
- self.assertEqual(roman.b.contents[0].next, None)
- self.assertEqual(numbers.nextSibling, letters)
- self.assertEqual(letters.previousSibling, numbers)
- self.assertEqual(len(doc.contents), 2)
- self.assertEqual(doc.contents[0], numbers)
- self.assertEqual(doc.contents[1], letters)
-
- # A more complex case.
- text = "<a>1<b>2<c>Hollywood, baby!</c></b></a>3"
- soup = BeautifulStoneSoup(text)
- one = soup.find(text="1")
- three = soup.find(text="3")
- toExtract = soup.b
- soup.b.extract()
- self.assertEqual(one.next, three)
- self.assertEqual(three.previous, one)
- self.assertEqual(one.parent.nextSibling, three)
- self.assertEqual(three.previousSibling, soup.a)
-
- def testClear(self):
- soup = BeautifulSoup("<ul><li></li><li></li></ul>")
- soup.ul.clear()
- self.assertEqual(len(soup.ul.contents), 0)
-
-class TheManWithoutAttributes(SoupTest):
- "Test attribute access"
-
- def testHasKey(self):
- text = "<foo attr='bar'>"
- self.assertEquals(BeautifulSoup(text).foo.has_key('attr'), True)
-
-class QuoteMeOnThat(SoupTest):
- "Test quoting"
- def testQuotedAttributeValues(self):
- self.assertSoupEquals("<foo attr='bar'></foo>",
- '<foo attr="bar"></foo>')
-
- text = """<foo attr='bar "brawls" happen'>a</foo>"""
- soup = BeautifulSoup(text)
- self.assertEquals(soup.renderContents(), text)
-
- soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
- newText = """<foo attr='Brawls happen at "Bob&squot;s Bar"'>a</foo>"""
- self.assertSoupEquals(soup.renderContents(), newText)
-
- self.assertSoupEquals('<this is="really messed up & stuff">',
- '<this is="really messed up & stuff"></this>')
-
- # This is not what the original author had in mind, but it's
- # a legitimate interpretation of what they wrote.
- self.assertSoupEquals("""<a href="foo</a>, </a><a href="bar">baz</a>""",
- '<a href="foo</a>, </a><a href="></a>, <a href="bar">baz</a>')
-
- # SGMLParser generates bogus parse events when attribute values
- # contain embedded brackets, but at least Beautiful Soup fixes
- # it up a little.
- self.assertSoupEquals('<a b="<a>">', '<a b="<a>"></a><a>"></a>')
- self.assertSoupEquals('<a href="http://foo.com/<a> and blah and blah',
- """<a href='"http://foo.com/'></a><a> and blah and blah</a>""")
-
-
-
-class YoureSoLiteral(SoupTest):
- "Test literal mode."
- def testLiteralMode(self):
- text = "<script>if (i<imgs.length)</script><b>Foo</b>"
- soup = BeautifulSoup(text)
- self.assertEqual(soup.script.contents[0], "if (i<imgs.length)")
- self.assertEqual(soup.b.contents[0], "Foo")
-
- def testTextArea(self):
- text = "<textarea><b>This is an example of an HTML tag</b><&<&</textarea>"
- soup = BeautifulSoup(text)
- self.assertEqual(soup.textarea.contents[0],
- "<b>This is an example of an HTML tag</b><&<&")
-
-class OperatorOverload(SoupTest):
- "Our operators do it all! Call now!"
-
- def testTagNameAsFind(self):
- "Tests that referencing a tag name as a member delegates to find()."
- soup = BeautifulSoup('<b id="1">foo<i>bar</i></b><b>Red herring</b>')
- self.assertEqual(soup.b.i, soup.find('b').find('i'))
- self.assertEqual(soup.b.i.string, 'bar')
- self.assertEqual(soup.b['id'], '1')
- self.assertEqual(soup.b.contents[0], 'foo')
- self.assert_(not soup.a)
-
- #Test the .fooTag variant of .foo.
- self.assertEqual(soup.bTag.iTag.string, 'bar')
- self.assertEqual(soup.b.iTag.string, 'bar')
- self.assertEqual(soup.find('b').find('i'), soup.bTag.iTag)
-
-class NestableEgg(SoupTest):
- """Here we test tag nesting. TEST THE NEST, DUDE! X-TREME!"""
-
- def testParaInsideBlockquote(self):
- soup = BeautifulSoup('<blockquote><p><b>Foo</blockquote><p>Bar')
- self.assertEqual(soup.blockquote.p.b.string, 'Foo')
- self.assertEqual(soup.blockquote.b.string, 'Foo')
- self.assertEqual(soup.find('p', recursive=False).string, 'Bar')
-
- def testNestedTables(self):
- text = """<table id="1"><tr><td>Here's another table:
- <table id="2"><tr><td>Juicy text</td></tr></table></td></tr></table>"""
- soup = BeautifulSoup(text)
- self.assertEquals(soup.table.table.td.string, 'Juicy text')
- self.assertEquals(len(soup.findAll('table')), 2)
- self.assertEquals(len(soup.table.findAll('table')), 1)
- self.assertEquals(soup.find('table', {'id' : 2}).parent.parent.parent.name,
- 'table')
-
- text = "<table><tr><td><div><table>Foo</table></div></td></tr></table>"
- soup = BeautifulSoup(text)
- self.assertEquals(soup.table.tr.td.div.table.contents[0], "Foo")
-
- text = """<table><thead><tr>Foo</tr></thead><tbody><tr>Bar</tr></tbody>
- <tfoot><tr>Baz</tr></tfoot></table>"""
- soup = BeautifulSoup(text)
- self.assertEquals(soup.table.thead.tr.contents[0], "Foo")
-
- def testBadNestedTables(self):
- soup = BeautifulSoup("<table><tr><table><tr id='nested'>")
- self.assertEquals(soup.table.tr.table.tr['id'], 'nested')
-
-class CleanupOnAisleFour(SoupTest):
- """Here we test cleanup of text that breaks SGMLParser or is just
- obnoxious."""
-
- def testSelfClosingtag(self):
- self.assertEqual(str(BeautifulSoup("Foo<br/>Bar").find('br')),
- '<br />')
-
- self.assertSoupEquals('<p>test1<br/>test2</p>',
- '<p>test1<br />test2</p>')
-
- text = '<p>test1<selfclosing>test2'
- soup = BeautifulStoneSoup(text)
- self.assertEqual(str(soup),
- '<p>test1<selfclosing>test2</selfclosing></p>')
-
- soup = BeautifulStoneSoup(text, selfClosingTags='selfclosing')
- self.assertEqual(str(soup),
- '<p>test1<selfclosing />test2</p>')
-
- def testSelfClosingTagOrNot(self):
- text = "<item><link>http://foo.com/</link></item>"
- self.assertEqual(BeautifulStoneSoup(text).renderContents(), text)
- self.assertEqual(BeautifulSoup(text).renderContents(),
- '<item><link />http://foo.com/</item>')
-
- def testCData(self):
- xml = "<root>foo<![CDATA[foobar]]>bar</root>"
- self.assertSoupEquals(xml, xml)
- r = re.compile("foo.*bar")
- soup = BeautifulSoup(xml)
- self.assertEquals(soup.find(text=r).string, "foobar")
- self.assertEquals(soup.find(text=r).__class__, CData)
-
- def testComments(self):
- xml = "foo<!--foobar-->baz"
- self.assertSoupEquals(xml)
- r = re.compile("foo.*bar")
- soup = BeautifulSoup(xml)
- self.assertEquals(soup.find(text=r).string, "foobar")
- self.assertEquals(soup.find(text="foobar").__class__, Comment)
-
- def testDeclaration(self):
- xml = "foo<!DOCTYPE foobar>baz"
- self.assertSoupEquals(xml)
- r = re.compile(".*foo.*bar")
- soup = BeautifulSoup(xml)
- text = "DOCTYPE foobar"
- self.assertEquals(soup.find(text=r).string, text)
- self.assertEquals(soup.find(text=text).__class__, Declaration)
-
- namespaced_doctype = ('<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd">'
- '<html>foo</html>')
- soup = BeautifulSoup(namespaced_doctype)
- self.assertEquals(soup.contents[0],
- 'DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd"')
- self.assertEquals(soup.html.contents[0], 'foo')
-
- def testEntityConversions(self):
- text = "<<sacré bleu!>>"
- soup = BeautifulStoneSoup(text)
- self.assertSoupEquals(text)
-
- xmlEnt = BeautifulStoneSoup.XML_ENTITIES
- htmlEnt = BeautifulStoneSoup.HTML_ENTITIES
- xhtmlEnt = BeautifulStoneSoup.XHTML_ENTITIES
-
- soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
- self.assertEquals(str(soup), "<<sacré bleu!>>")
-
- soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
- self.assertEquals(str(soup), "<<sacré bleu!>>")
-
- soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
- self.assertEquals(unicode(soup), u"<<sacr\xe9 bleu!>>")
-
- # Make sure the "XML", "HTML", and "XHTML" settings work.
- text = "<™'"
- soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
- self.assertEquals(unicode(soup), u"<™'")
-
- soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
- self.assertEquals(unicode(soup), u"<\u2122'")
-
- soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt)
- self.assertEquals(unicode(soup), u"<\u2122'")
-
- invalidEntity = "foo&#bar;baz"
- soup = BeautifulStoneSoup\
- (invalidEntity,
- convertEntities=htmlEnt)
- self.assertEquals(str(soup), invalidEntity)
-
- def testNonBreakingSpaces(self):
- soup = BeautifulSoup("<a> </a>",
- convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
- self.assertEquals(unicode(soup), u"<a>\xa0\xa0</a>")
-
- def testWhitespaceInDeclaration(self):
- self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>')
-
- def testJunkInDeclaration(self):
- self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a')
-
- def testIncompleteDeclaration(self):
- self.assertSoupEquals('a<!b <p>c')
-
- def testEntityReplacement(self):
- self.assertSoupEquals('<b>hello there</b>')
-
- def testEntitiesInAttributeValues(self):
- self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>')
- self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>')
-
- soup = BeautifulSoup('<x t=">™">',
- convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
- self.assertEquals(unicode(soup), u'<x t=">\u2122"></x>')
-
- uri = "http://crummy.com?sacré&bleu"
- link = '<a href="%s"></a>' % uri
- soup = BeautifulSoup(link)
- self.assertEquals(unicode(soup), link)
- #self.assertEquals(unicode(soup.a['href']), uri)
-
- soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
- self.assertEquals(unicode(soup),
- link.replace("é", u"\xe9"))
-
- uri = "http://crummy.com?sacré&bleu"
- link = '<a href="%s"></a>' % uri
- soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
- self.assertEquals(unicode(soup.a['href']),
- uri.replace("é", u"\xe9"))
-
- def testNakedAmpersands(self):
- html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES}
- soup = BeautifulStoneSoup("AT&T ", **html)
- self.assertEquals(str(soup), 'AT&T ')
-
- nakedAmpersandInASentence = "AT&T was Ma Bell"
- soup = BeautifulStoneSoup(nakedAmpersandInASentence,**html)
- self.assertEquals(str(soup), \
- nakedAmpersandInASentence.replace('&','&'))
-
- invalidURL = '<a href="http://example.org?a=1&b=2;3">foo</a>'
- validURL = invalidURL.replace('&','&')
- soup = BeautifulStoneSoup(invalidURL)
- self.assertEquals(str(soup), validURL)
-
- soup = BeautifulStoneSoup(validURL)
- self.assertEquals(str(soup), validURL)
-
-
-class EncodeRed(SoupTest):
- """Tests encoding conversion, Unicode conversion, and Microsoft
- smart quote fixes."""
-
- def testUnicodeDammitStandalone(self):
- markup = "<foo>\x92</foo>"
- dammit = UnicodeDammit(markup)
- self.assertEquals(dammit.unicode, "<foo>’</foo>")
-
- hebrew = "\xed\xe5\xec\xf9"
- dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
- self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
- self.assertEquals(dammit.originalEncoding, 'iso-8859-8')
-
- def testGarbageInGarbageOut(self):
- ascii = "<foo>a</foo>"
- asciiSoup = BeautifulStoneSoup(ascii)
- self.assertEquals(ascii, str(asciiSoup))
-
- unicodeData = u"<foo>\u00FC</foo>"
- utf8 = unicodeData.encode("utf-8")
- self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')
-
- unicodeSoup = BeautifulStoneSoup(unicodeData)
- self.assertEquals(unicodeData, unicode(unicodeSoup))
- self.assertEquals(unicode(unicodeSoup.foo.string), u'\u00FC')
-
- utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8')
- self.assertEquals(utf8, str(utf8Soup))
- self.assertEquals(utf8Soup.originalEncoding, "utf-8")
-
- utf8Soup = BeautifulStoneSoup(unicodeData)
- self.assertEquals(utf8, str(utf8Soup))
- self.assertEquals(utf8Soup.originalEncoding, None)
-
-
- def testHandleInvalidCodec(self):
- for bad_encoding in ['.utf8', '...', 'utF---16.!']:
- soup = BeautifulSoup("Räksmörgås", fromEncoding=bad_encoding)
- self.assertEquals(soup.originalEncoding, 'utf-8')
-
- def testUnicodeSearch(self):
- html = u'<html><body><h1>Räksmörgås</h1></body></html>'
- soup = BeautifulSoup(html)
- self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')
-
- def testRewrittenXMLHeader(self):
- euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
- utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
- soup = BeautifulStoneSoup(euc_jp)
- if soup.originalEncoding != "euc-jp":
- raise Exception("Test failed when parsing euc-jp document. "
- "If you're running Python >=2.4, or you have "
- "cjkcodecs installed, this is a real problem. "
- "Otherwise, ignore it.")
-
- self.assertEquals(soup.originalEncoding, "euc-jp")
- self.assertEquals(str(soup), utf8)
-
- old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"
- new_text = "<?xml version='1.0' encoding='utf-8'?><foo>’</foo>"
- self.assertSoupEquals(old_text, new_text)
-
- def testRewrittenMetaTag(self):
- no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
- soup = BeautifulSoup(no_shift_jis_html)
-
- # Beautiful Soup used to try to rewrite the meta tag even if the
- # meta tag got filtered out by the strainer. This test makes
- # sure that doesn't happen.
- strainer = SoupStrainer('pre')
- soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)
- self.assertEquals(soup.contents[0].name, 'pre')
-
- meta_tag = ('<meta content="text/html; charset=x-sjis" '
- 'http-equiv="Content-type" />')
- shift_jis_html = (
- '<html><head>\n%s\n'
- '<meta http-equiv="Content-language" content="ja" />'
- '</head><body><pre>\n'
- '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
- '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
- '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
- '</pre></body></html>') % meta_tag
- soup = BeautifulSoup(shift_jis_html)
- if soup.originalEncoding != "shift-jis":
- raise Exception("Test failed when parsing shift-jis document "
- "with meta tag '%s'."
- "If you're running Python >=2.4, or you have "
- "cjkcodecs installed, this is a real problem. "
- "Otherwise, ignore it." % meta_tag)
- self.assertEquals(soup.originalEncoding, "shift-jis")
-
- content_type_tag = soup.meta['content']
- self.assertEquals(content_type_tag[content_type_tag.find('charset='):],
- 'charset=%SOUP-ENCODING%')
- content_type = str(soup.meta)
- index = content_type.find('charset=')
- self.assertEqual(content_type[index:index+len('charset=utf8')+1],
- 'charset=utf-8')
- content_type = soup.meta.__str__('shift-jis')
- index = content_type.find('charset=')
- self.assertEqual(content_type[index:index+len('charset=shift-jis')],
- 'charset=shift-jis')
-
- self.assertEquals(str(soup), (
- '<html><head>\n'
- '<meta content="text/html; charset=utf-8" '
- 'http-equiv="Content-type" />\n'
- '<meta http-equiv="Content-language" content="ja" />'
- '</head><body><pre>\n'
- '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
- '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
- '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
- '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
- '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
- '</pre></body></html>'))
- self.assertEquals(soup.renderContents("shift-jis"),
- shift_jis_html.replace('x-sjis', 'shift-jis'))
-
- isolatin ="""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
- soup = BeautifulSoup(isolatin)
- self.assertSoupEquals(soup.__str__("utf-8"),
- isolatin.replace("ISO-Latin-1", "utf-8").replace("\xe9", "\xc3\xa9"))
-
- def testHebrew(self):
- iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
- utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
- soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")
- self.assertEquals(str(soup), utf8)
-
- def testSmartQuotesNotSoSmartAnymore(self):
- self.assertSoupEquals("\x91Foo\x92 <!--blah-->",
- '‘Foo’ <!--blah-->')
-
- def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):
- smartQuotes = "Il a dit, \x8BSacré bleu!\x9b"
- soup = BeautifulSoup(smartQuotes)
- self.assertEquals(str(soup),
- 'Il a dit, ‹Sacré bleu!›')
- soup = BeautifulSoup(smartQuotes, convertEntities="html")
- self.assertEquals(str(soup),
- 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
-
- def testDontSeeSmartQuotesWhereThereAreNone(self):
- utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
- self.assertSoupEquals(utf_8)
-
-
-class Whitewash(SoupTest):
- """Test whitespace preservation."""
-
- def testPreservedWhitespace(self):
- self.assertSoupEquals("<pre> </pre>")
- self.assertSoupEquals("<pre> woo </pre>")
-
- def testCollapsedWhitespace(self):
- self.assertSoupEquals("<p> </p>", "<p> </p>")
-
-
-if __name__ == '__main__':
- unittest.main()
|
|
@@ -1,19 +1,0 @@ - Metadata-Version: 1.0
-Name: BeautifulSoup
-Version: 3.2.0
-Summary: HTML/XML parser for quick-turnaround applications like screen-scraping.
-Home-page: http://www.crummy.com/software/BeautifulSoup/
-Author: Leonard Richardson
-Author-email: leonardr@segfault.org
-License: BSD
-Download-URL: http://www.crummy.com/software/BeautifulSoup/download/
-Description: Beautiful Soup parses arbitrarily invalid SGML and provides a variety of methods and Pythonic idioms for iterating and searching the parse tree.
-Platform: UNKNOWN
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: Python Software Foundation License
-Classifier: Programming Language :: Python
-Classifier: Topic :: Text Processing :: Markup :: HTML
-Classifier: Topic :: Text Processing :: Markup :: XML
-Classifier: Topic :: Text Processing :: Markup :: SGML
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
@@ -1,60 +1,0 @@ - from distutils.core import setup
-import unittest
-import warnings
-warnings.filterwarnings("ignore", "Unknown distribution option")
-
-import sys
-# patch distutils if it can't cope with the "classifiers" keyword
-if sys.version < '2.2.3':
- from distutils.dist import DistributionMetadata
- DistributionMetadata.classifiers = None
- DistributionMetadata.download_url = None
-
-from BeautifulSoup import __version__
-
-#Make sure all the tests complete.
-import BeautifulSoupTests
-loader = unittest.TestLoader()
-result = unittest.TestResult()
-suite = loader.loadTestsFromModule(BeautifulSoupTests)
-suite.run(result)
-if not result.wasSuccessful():
- print "Unit tests have failed!"
- for l in result.errors, result.failures:
- for case, error in l:
- print "-" * 80
- desc = case.shortDescription()
- if desc:
- print desc
- print error
- print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?'''
- print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup."
- if sys.argv[1] == 'sdist':
- print
- print "I'm not going to make a source distribution since the tests don't pass."
- sys.exit(1)
-
-setup(name="BeautifulSoup",
- version=__version__,
- py_modules=['BeautifulSoup', 'BeautifulSoupTests'],
- description="HTML/XML parser for quick-turnaround applications like screen-scraping.",
- author="Leonard Richardson",
- author_email = "leonardr@segfault.org",
- long_description="""Beautiful Soup parses arbitrarily invalid SGML and provides a variety of methods and Pythonic idioms for iterating and searching the parse tree.""",
- classifiers=["Development Status :: 5 - Production/Stable",
- "Intended Audience :: Developers",
- "License :: OSI Approved :: Python Software Foundation License",
- "Programming Language :: Python",
- "Topic :: Text Processing :: Markup :: HTML",
- "Topic :: Text Processing :: Markup :: XML",
- "Topic :: Text Processing :: Markup :: SGML",
- "Topic :: Software Development :: Libraries :: Python Modules",
- ],
- url="http://www.crummy.com/software/BeautifulSoup/",
- license="BSD",
- download_url="http://www.crummy.com/software/BeautifulSoup/download/"
- )
-
- # Send announce to:
- # python-announce@python.org
- # python-list@python.org
|
|
|
@@ -1,284 +1,0 @@ - #!python
-"""Bootstrap setuptools installation
-
-If you want to use setuptools in your package's setup.py, just include this
-file in the same directory with it, and add this to the top of your setup.py::
-
- from ez_setup import use_setuptools
- use_setuptools()
-
-If you want to require a specific version of setuptools, set a download
-mirror, or use an alternate download directory, you can do so by supplying
-the appropriate options to ``use_setuptools()``.
-
-This file can also be run as a script to install or upgrade setuptools.
-"""
-import sys
-DEFAULT_VERSION = "0.6c11"
-DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3]
-
-md5_data = {
- 'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca',
- 'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb',
- 'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b',
- 'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a',
- 'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618',
- 'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac',
- 'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5',
- 'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4',
- 'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c',
- 'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b',
- 'setuptools-0.6c10-py2.3.egg': 'ce1e2ab5d3a0256456d9fc13800a7090',
- 'setuptools-0.6c10-py2.4.egg': '57d6d9d6e9b80772c59a53a8433a5dd4',
- 'setuptools-0.6c10-py2.5.egg': 'de46ac8b1c97c895572e5e8596aeb8c7',
- 'setuptools-0.6c10-py2.6.egg': '58ea40aef06da02ce641495523a0b7f5',
- 'setuptools-0.6c11-py2.3.egg': '2baeac6e13d414a9d28e7ba5b5a596de',
- 'setuptools-0.6c11-py2.4.egg': 'bd639f9b0eac4c42497034dec2ec0c2b',
- 'setuptools-0.6c11-py2.5.egg': '64c94f3bf7a72a13ec83e0b24f2749b2',
- 'setuptools-0.6c11-py2.6.egg': 'bfa92100bd772d5a213eedd356d64086',
- 'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27',
- 'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277',
- 'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa',
- 'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e',
- 'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e',
- 'setuptools-0.6c4-py2.3.egg': 'b0b9131acab32022bfac7f44c5d7971f',
- 'setuptools-0.6c4-py2.4.egg': '2a1f9656d4fbf3c97bf946c0a124e6e2',
- 'setuptools-0.6c4-py2.5.egg': '8f5a052e32cdb9c72bcf4b5526f28afc',
- 'setuptools-0.6c5-py2.3.egg': 'ee9fd80965da04f2f3e6b3576e9d8167',
- 'setuptools-0.6c5-py2.4.egg': 'afe2adf1c01701ee841761f5bcd8aa64',
- 'setuptools-0.6c5-py2.5.egg': 'a8d3f61494ccaa8714dfed37bccd3d5d',
- 'setuptools-0.6c6-py2.3.egg': '35686b78116a668847237b69d549ec20',
- 'setuptools-0.6c6-py2.4.egg': '3c56af57be3225019260a644430065ab',
- 'setuptools-0.6c6-py2.5.egg': 'b2f8a7520709a5b34f80946de5f02f53',
- 'setuptools-0.6c7-py2.3.egg': '209fdf9adc3a615e5115b725658e13e2',
- 'setuptools-0.6c7-py2.4.egg': '5a8f954807d46a0fb67cf1f26c55a82e',
- 'setuptools-0.6c7-py2.5.egg': '45d2ad28f9750e7434111fde831e8372',
- 'setuptools-0.6c8-py2.3.egg': '50759d29b349db8cfd807ba8303f1902',
- 'setuptools-0.6c8-py2.4.egg': 'cba38d74f7d483c06e9daa6070cce6de',
- 'setuptools-0.6c8-py2.5.egg': '1721747ee329dc150590a58b3e1ac95b',
- 'setuptools-0.6c9-py2.3.egg': 'a83c4020414807b496e4cfbe08507c03',
- 'setuptools-0.6c9-py2.4.egg': '260a2be2e5388d66bdaee06abec6342a',
- 'setuptools-0.6c9-py2.5.egg': 'fe67c3e5a17b12c0e7c541b7ea43a8e6',
- 'setuptools-0.6c9-py2.6.egg': 'ca37b1ff16fa2ede6e19383e7b59245a',
-}
-
-import sys, os
-try: from hashlib import md5
-except ImportError: from md5 import md5
-
-def _validate_md5(egg_name, data):
- if egg_name in md5_data:
- digest = md5(data).hexdigest()
- if digest != md5_data[egg_name]:
- print >>sys.stderr, (
- "md5 validation of %s failed! (Possible download problem?)"
- % egg_name
- )
- sys.exit(2)
- return data
-
-def use_setuptools(
- version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
- download_delay=15
-):
- """Automatically find/download setuptools and make it available on sys.path
-
- `version` should be a valid setuptools version number that is available
- as an egg for download under the `download_base` URL (which should end with
- a '/'). `to_dir` is the directory where setuptools will be downloaded, if
- it is not already available. If `download_delay` is specified, it should
- be the number of seconds that will be paused before initiating a download,
- should one be required. If an older version of setuptools is installed,
- this routine will print a message to ``sys.stderr`` and raise SystemExit in
- an attempt to abort the calling script.
- """
- was_imported = 'pkg_resources' in sys.modules or 'setuptools' in sys.modules
- def do_download():
- egg = download_setuptools(version, download_base, to_dir, download_delay)
- sys.path.insert(0, egg)
- import setuptools; setuptools.bootstrap_install_from = egg
- try:
- import pkg_resources
- except ImportError:
- return do_download()
- try:
- pkg_resources.require("setuptools>="+version); return
- except pkg_resources.VersionConflict, e:
- if was_imported:
- print >>sys.stderr, (
- "The required version of setuptools (>=%s) is not available, and\n"
- "can't be installed while this script is running. Please install\n"
- " a more recent version first, using 'easy_install -U setuptools'."
- "\n\n(Currently using %r)"
- ) % (version, e.args[0])
- sys.exit(2)
- except pkg_resources.DistributionNotFound:
- pass
-
- del pkg_resources, sys.modules['pkg_resources'] # reload ok
- return do_download()
-
-def download_setuptools(
- version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
- delay = 15
-):
- """Download setuptools from a specified location and return its filename
-
- `version` should be a valid setuptools version number that is available
- as an egg for download under the `download_base` URL (which should end
- with a '/'). `to_dir` is the directory where the egg will be downloaded.
- `delay` is the number of seconds to pause before an actual download attempt.
- """
- import urllib2, shutil
- egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3])
- url = download_base + egg_name
- saveto = os.path.join(to_dir, egg_name)
- src = dst = None
- if not os.path.exists(saveto): # Avoid repeated downloads
- try:
- from distutils import log
- if delay:
- log.warn("""
----------------------------------------------------------------------------
-This script requires setuptools version %s to run (even to display
-help). I will attempt to download it for you (from
-%s), but
-you may need to enable firewall access for this script first.
-I will start the download in %d seconds.
-
-(Note: if this machine does not have network access, please obtain the file
-
- %s
-
-and place it in this directory before rerunning this script.)
----------------------------------------------------------------------------""",
- version, download_base, delay, url
- ); from time import sleep; sleep(delay)
- log.warn("Downloading %s", url)
- src = urllib2.urlopen(url)
- # Read/write all in one block, so we don't create a corrupt file
- # if the download is interrupted.
- data = _validate_md5(egg_name, src.read())
- dst = open(saveto,"wb"); dst.write(data)
- finally:
- if src: src.close()
- if dst: dst.close()
- return os.path.realpath(saveto)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-def main(argv, version=DEFAULT_VERSION):
- """Install or upgrade setuptools and EasyInstall"""
- try:
- import setuptools
- except ImportError:
- egg = None
- try:
- egg = download_setuptools(version, delay=0)
- sys.path.insert(0,egg)
- from setuptools.command.easy_install import main
- return main(list(argv)+[egg]) # we're done here
- finally:
- if egg and os.path.exists(egg):
- os.unlink(egg)
- else:
- if setuptools.__version__ == '0.0.1':
- print >>sys.stderr, (
- "You have an obsolete version of setuptools installed. Please\n"
- "remove it from your system entirely before rerunning this script."
- )
- sys.exit(2)
-
- req = "setuptools>="+version
- import pkg_resources
- try:
- pkg_resources.require(req)
- except pkg_resources.VersionConflict:
- try:
- from setuptools.command.easy_install import main
- except ImportError:
- from easy_install import main
- main(list(argv)+[download_setuptools(delay=0)])
- sys.exit(0) # try to force an exit
- else:
- if argv:
- from setuptools.command.easy_install import main
- main(argv)
- else:
- print "Setuptools version",version,"or greater has been installed."
- print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)'
-
-def update_md5(filenames):
- """Update our built-in md5 registry"""
-
- import re
-
- for name in filenames:
- base = os.path.basename(name)
- f = open(name,'rb')
- md5_data[base] = md5(f.read()).hexdigest()
- f.close()
-
- data = [" %r: %r,\n" % it for it in md5_data.items()]
- data.sort()
- repl = "".join(data)
-
- import inspect
- srcfile = inspect.getsourcefile(sys.modules[__name__])
- f = open(srcfile, 'rb'); src = f.read(); f.close()
-
- match = re.search("\nmd5_data = {\n([^}]+)}", src)
- if not match:
- print >>sys.stderr, "Internal error!"
- sys.exit(2)
-
- src = src[:match.start(1)] + repl + src[match.end(1):]
- f = open(srcfile,'w')
- f.write(src)
- f.close()
-
-
-if __name__=='__main__':
- if len(sys.argv)>2 and sys.argv[1]=='--md5update':
- update_md5(sys.argv[2:])
- else:
- main(sys.argv[1:])
-
-
-
-
-
-
|
|
|
@@ -1,8 +1,23 @@ - import urllib2
-import mimetools
-from StringIO import StringIO
+import sys
+try:
+ from email.generator import _make_boundary
+except ImportError:
+ from mimetools import choose_boundary as _make_boundary
+try:
+ import urllib.request as urllib_request
+except ImportError:
+ import urllib2 as urllib_request
+try:
+ from io import BytesIO
+except ImportError:
+ from StringIO import StringIO as BytesIO
-from BeautifulSoup import BeautifulSoup, CData
+try:
+ basestring
+except NameError:
+ basestring = str
+
+from bs4 import BeautifulSoup, CData
DEBUG = False # Set to True for debugging output.
@@ -22,14 +37,15 @@ url += '/'
if token:
- self._token = token.encode('utf-8')
+ self._token = token
else:
self._token = None
- self._opener = urllib2.build_opener()
+ self._opener = urllib_request.build_opener()
try:
soup = BeautifulSoup(self._opener.open(url + 'api.xml'))
- except (urllib2.URLError, urllib2.HTTPError), e:
+ except (urllib_request.URLError, urllib_request.HTTPError):
+ e = sys.exc_info()[1]
raise FogBugzConnectionError("Library could not connect to the FogBugz API. Either this installation of FogBugz does not support the API, or the url, %s, is incorrect.\n\nError: %s" % (self._url, e))
self._url = url + soup.response.url.string
self.currentFilter = None
@@ -44,13 +60,14 @@ self.logoff()
try:
response = self.__makerequest('logon', email=username, password=password)
- except FogBugzAPIError, e:
+ except FogBugzAPIError:
+ e = sys.exc_info()[1]
raise FogBugzLogonError(e)
-
+
self._token = response.token.string
if type(self._token) == CData:
self._token = self._token.encode('utf-8')
-
+
def logoff(self):
"""
Logs off the current user.
@@ -62,7 +79,7 @@ """
Set the token without actually logging on. More secure.
"""
- self._token = token.encode('utf-8')
+ self._token = token
def __encode_multipart_formdata(self, fields, files):
"""
@@ -70,28 +87,46 @@ files is a sequence of (filename, filehandle) files to be uploaded
returns (content_type, body)
"""
- BOUNDARY = mimetools.choose_boundary()
+ BOUNDARY = _make_boundary()
if len(files) > 0:
fields['nFileCount'] = str(len(files))
crlf = '\r\n'
- buf = StringIO()
+ buf = BytesIO()
for k, v in fields.items():
if DEBUG:
print("field: %s: %s"% (repr(k), repr(v)))
- buf.write(crlf.join([ '--' + BOUNDARY, 'Content-disposition: form-data; name="%s"' % k, '', str(v), '' ]))
-
+ lines = [
+ '--' + BOUNDARY,
+ 'Content-disposition: form-data; name="%s"' % k,
+ '',
+ str(v),
+ '',
+ ]
+ buf.write(crlf.join(lines).encode('utf-8'))
+
n = 0
for f, h in files.items():
n += 1
- buf.write(crlf.join([ '--' + BOUNDARY, 'Content-disposition: form-data; name="File%d"; filename="%s"' % ( n, f), '' ]))
- buf.write(crlf.join([ 'Content-type: application/octet-stream', '', '' ]))
- buf.write(h.read())
- buf.write(crlf)
-
- buf.write('--' + BOUNDARY + '--' + crlf)
+ lines = [
+ '--' + BOUNDARY,
+ 'Content-disposition: form-data; name="File%d"; '
+ 'filename="%s"' % (n, f),
+ '',
+ ]
+ buf.write(crlf.join(lines).encode('utf-8'))
+ lines = [
+ 'Content-type: application/octet-stream',
+ '',
+ '',
+ ]
+ buf.write(crlf.join(lines).encode('utf-8'))
+ buf.write(h.read().encode('utf-8'))
+ buf.write(crlf.encode('utf-8'))
+
+ buf.write(('--' + BOUNDARY + '--' + crlf).encode('utf-8'))
content_type = "multipart/form-data; boundary=%s" % BOUNDARY
return content_type, buf.getvalue()
@@ -99,23 +134,31 @@ kwargs["cmd"] = cmd
if self._token:
kwargs["token"] = self._token
-
- fields = dict([k, v.encode('utf-8') if isinstance(v,basestring) else v] for k, v in kwargs.items())
+
+ fields = kwargs
files = fields.get('Files', {})
if 'Files' in fields:
del fields['Files']
-
+
content_type, body = self.__encode_multipart_formdata(fields, files)
+ if DEBUG:
+ print(body)
headers = { 'Content-Type': content_type,
'Content-Length': str(len(body))}
-
+
try:
- request = urllib2.Request(self._url.encode('utf-8'), body, headers)
- response = BeautifulSoup(self._opener.open(request)).response
- except urllib2.URLError, e:
+ url = self._url
+ if sys.version_info < (3,):
+ url = self._url.encode('utf-8')
+ request = urllib_request.Request(url, body, headers)
+ resp_stream = self._opener.open(request)
+ response = BeautifulSoup(resp_stream, "xml").response
+ except urllib_request.URLError:
+ e = sys.exc_info()[1]
raise FogBugzConnectionError(e)
- except UnicodeDecodeError, e:
- print kwargs
+ except UnicodeDecodeError:
+ e = sys.exc_info()[1]
+ print(kwargs)
raise
if response.error:
@@ -134,10 +177,8 @@ if name.startswith("__"):
raise AttributeError("No such attribute '%s'" % name)
- if not self.__handlerCache.has_key(name):
+ if name not in self.__handlerCache:
def handler(**kwargs):
return self.__makerequest(name, **kwargs)
self.__handlerCache[name] = handler
return self.__handlerCache[name]
-
-
|
|
|
@@ -1,7 +1,4 @@ try:
- from ez_setup import use_setuptools
- use_setuptools()
-
from setuptools import setup
except ImportError:
from distutils.core import setup
@@ -99,7 +96,11 @@ maintainer_email='customer-service@fogcreek.com',
url='https://developers.kilnhg.com/Repo/FogBugz/Group/FogBugzPy',
download_url='https://developers.kilnhg.com/Repo/FogBugz/Group/FogBugzPy',
- install_requires=['BeautifulSoup>=3.2,<3.3.0'],
+ install_requires=[
+ 'BeautifulSoup4',
+ 'lxml',
+ 'six',
+ ],
requires='BeautifulSoup',
classifiers=[
'Development Status :: 5 - Production/Stable',
|
Loading...